OSDN Git Service

610e683493a173d7bf116e6e295df822965fc21e
[coroid/ffmpeg_saccubus.git] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
63 static uint8_t default_fcode_tab[MAX_MV*2+1];
64
65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
66                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
67 {
68     int qscale;
69     int shift=0;
70
71     for(qscale=qmin; qscale<=qmax; qscale++){
72         int i;
73         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
74             dsp->fdct == ff_jpeg_fdct_islow_10
75 #ifdef FAAN_POSTSCALE
76             || dsp->fdct == ff_faandct
77 #endif
78             ) {
79             for(i=0;i<64;i++) {
80                 const int j= dsp->idct_permutation[i];
81                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
82                 /* 19952             <= ff_aanscales[i] * qscale * quant_matrix[i]               <= 249205026 */
83                 /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1 << 36) / 249205026 */
84                 /* 3444240           >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
85
86                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
87                                 (qscale * quant_matrix[j]));
88             }
89         } else if (dsp->fdct == fdct_ifast
90 #ifndef FAAN_POSTSCALE
91                    || dsp->fdct == ff_faandct
92 #endif
93                    ) {
94             for(i=0;i<64;i++) {
95                 const int j= dsp->idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
97                 /* 19952             <= ff_aanscales[i] * qscale * quant_matrix[i]               <= 249205026 */
98                 /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
99                 /* 3444240           >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
100
101                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
102                                 (ff_aanscales[i] * qscale * quant_matrix[j]));
103             }
104         } else {
105             for(i=0;i<64;i++) {
106                 const int j= dsp->idct_permutation[i];
107                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
108                    So 16           <= qscale * quant_matrix[i]             <= 7905
109                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
110                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
111                 */
112                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
113 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
114                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
115
116                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
117                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
118             }
119         }
120
121         for(i=intra; i<64; i++){
122             int64_t max= 8191;
123             if (dsp->fdct == fdct_ifast
124 #ifndef FAAN_POSTSCALE
125                    || dsp->fdct == ff_faandct
126 #endif
127                    ) {
128                 max = (8191LL*ff_aanscales[i]) >> 14;
129             }
130             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
131                 shift++;
132             }
133         }
134     }
135     if(shift){
136         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
137     }
138 }
139
140 static inline void update_qscale(MpegEncContext *s){
141     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
142     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
143
144     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
145 }
146
147 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
148     int i;
149
150     if(matrix){
151         put_bits(pb, 1, 1);
152         for(i=0;i<64;i++) {
153             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
154         }
155     }else
156         put_bits(pb, 1, 0);
157 }
158
159 /**
160  * init s->current_picture.qscale_table from s->lambda_table
161  */
162 void ff_init_qscale_tab(MpegEncContext *s){
163     int8_t * const qscale_table = s->current_picture.f.qscale_table;
164     int i;
165
166     for(i=0; i<s->mb_num; i++){
167         unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
168         int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
169         qscale_table[ s->mb_index2xy[i] ]= av_clip(qp, s->avctx->qmin, s->avctx->qmax);
170     }
171 }
172
173 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
174     int i;
175
176     dst->pict_type              = src->pict_type;
177     dst->quality                = src->quality;
178     dst->coded_picture_number   = src->coded_picture_number;
179     dst->display_picture_number = src->display_picture_number;
180 //    dst->reference              = src->reference;
181     dst->pts                    = src->pts;
182     dst->interlaced_frame       = src->interlaced_frame;
183     dst->top_field_first        = src->top_field_first;
184
185     if(s->avctx->me_threshold){
186         if(!src->motion_val[0])
187             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
188         if(!src->mb_type)
189             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
190         if(!src->ref_index[0])
191             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
192         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
193             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
194             src->motion_subsample_log2, dst->motion_subsample_log2);
195
196         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
197
198         for(i=0; i<2; i++){
199             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
200             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
201
202             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
203                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
204             }
205             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
206                 memcpy(dst->ref_index[i], src->ref_index[i], s->mb_stride*4*s->mb_height*sizeof(int8_t));
207             }
208         }
209     }
210 }
211
212 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
213 #define COPY(a) dst->a= src->a
214     COPY(pict_type);
215     COPY(current_picture);
216     COPY(f_code);
217     COPY(b_code);
218     COPY(qscale);
219     COPY(lambda);
220     COPY(lambda2);
221     COPY(picture_in_gop_number);
222     COPY(gop_picture_number);
223     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
224     COPY(progressive_frame); //FIXME don't set in encode_header
225     COPY(partitioned_frame); //FIXME don't set in encode_header
226 #undef COPY
227 }
228
229 /**
230  * sets the given MpegEncContext to defaults for encoding.
231  * the changed fields will not depend upon the prior state of the MpegEncContext.
232  */
233 static void MPV_encode_defaults(MpegEncContext *s){
234     int i;
235     MPV_common_defaults(s);
236
237     for(i=-16; i<16; i++){
238         default_fcode_tab[i + MAX_MV]= 1;
239     }
240     s->me.mv_penalty= default_mv_penalty;
241     s->fcode_tab= default_fcode_tab;
242 }
243
244 /* init video encoder */
245 av_cold int MPV_encode_init(AVCodecContext *avctx)
246 {
247     MpegEncContext *s = avctx->priv_data;
248     int i;
249     int chroma_h_shift, chroma_v_shift;
250
251     MPV_encode_defaults(s);
252
253     switch (avctx->codec_id) {
254     case CODEC_ID_MPEG2VIDEO:
255         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
256             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
257             return -1;
258         }
259         break;
260     case CODEC_ID_LJPEG:
261         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P && avctx->pix_fmt != PIX_FMT_YUVJ444P && avctx->pix_fmt != PIX_FMT_BGRA &&
262            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P && avctx->pix_fmt != PIX_FMT_YUV444P) || avctx->strict_std_compliance>FF_COMPLIANCE_UNOFFICIAL)){
263             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
264             return -1;
265         }
266         break;
267     case CODEC_ID_MJPEG:
268         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
269            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_UNOFFICIAL)){
270             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
271             return -1;
272         }
273         break;
274     default:
275         if(avctx->pix_fmt != PIX_FMT_YUV420P){
276             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
277             return -1;
278         }
279     }
280
281     switch (avctx->pix_fmt) {
282     case PIX_FMT_YUVJ422P:
283     case PIX_FMT_YUV422P:
284         s->chroma_format = CHROMA_422;
285         break;
286     case PIX_FMT_YUVJ420P:
287     case PIX_FMT_YUV420P:
288     default:
289         s->chroma_format = CHROMA_420;
290         break;
291     }
292
293     s->bit_rate = avctx->bit_rate;
294     s->width = avctx->width;
295     s->height = avctx->height;
296     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
297         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
298         avctx->gop_size=600;
299     }
300     s->gop_size = avctx->gop_size;
301     s->avctx = avctx;
302     s->flags= avctx->flags;
303     s->flags2= avctx->flags2;
304     s->max_b_frames= avctx->max_b_frames;
305     s->codec_id= avctx->codec->id;
306     s->luma_elim_threshold  = avctx->luma_elim_threshold;
307     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
308     s->strict_std_compliance= avctx->strict_std_compliance;
309 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
310     if (avctx->flags & CODEC_FLAG_PART)
311         s->data_partitioning = 1;
312 #endif
313     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
314     s->mpeg_quant= avctx->mpeg_quant;
315     s->rtp_mode= !!avctx->rtp_payload_size;
316     s->intra_dc_precision= avctx->intra_dc_precision;
317     s->user_specified_pts = AV_NOPTS_VALUE;
318
319     if (s->gop_size <= 1) {
320         s->intra_only = 1;
321         s->gop_size = 12;
322     } else {
323         s->intra_only = 0;
324     }
325
326     s->me_method = avctx->me_method;
327
328     /* Fixed QSCALE */
329     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
330
331     s->adaptive_quant= (   s->avctx->lumi_masking
332                         || s->avctx->dark_masking
333                         || s->avctx->temporal_cplx_masking
334                         || s->avctx->spatial_cplx_masking
335                         || s->avctx->p_masking
336                         || s->avctx->border_masking
337                         || (s->flags&CODEC_FLAG_QP_RD))
338                        && !s->fixed_qscale;
339
340     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
341 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
342     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
343     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
344     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
345     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
346 #endif
347
348     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
349         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
350         return -1;
351     }
352
353     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
354         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
355     }
356
357     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
358         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
359         return -1;
360     }
361
362     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
363         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
364         return -1;
365     }
366
367     if(avctx->rc_max_rate && avctx->rc_max_rate == avctx->bit_rate && avctx->rc_max_rate != avctx->rc_min_rate){
368         av_log(avctx, AV_LOG_INFO, "impossible bitrate constraints, this will fail\n");
369     }
370
371     if(avctx->rc_buffer_size && avctx->bit_rate*(int64_t)avctx->time_base.num > avctx->rc_buffer_size * (int64_t)avctx->time_base.den){
372         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
373         return -1;
374     }
375
376     if(!s->fixed_qscale && avctx->bit_rate*av_q2d(avctx->time_base) > avctx->bit_rate_tolerance){
377         av_log(avctx, AV_LOG_ERROR, "bitrate tolerance too small for bitrate\n");
378         return -1;
379     }
380
381     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
382        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
383        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
384
385         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
386     }
387
388     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
389        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
390         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
391         return -1;
392     }
393
394     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
395         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
396         return -1;
397     }
398
399 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
400     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
401         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
402         return -1;
403     }
404 #endif
405
406     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
407         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
408         return -1;
409     }
410
411 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
412     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
413         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
414         return -1;
415     }
416 #endif
417
418     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
419         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
420         return -1;
421     }
422
423     if ((s->codec_id == CODEC_ID_MPEG4 || s->codec_id == CODEC_ID_H263 ||
424          s->codec_id == CODEC_ID_H263P) &&
425         (avctx->sample_aspect_ratio.num > 255 || avctx->sample_aspect_ratio.den > 255)) {
426         av_log(avctx, AV_LOG_ERROR, "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
427                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
428         return -1;
429     }
430
431     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
432        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
433         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
434         return -1;
435     }
436
437     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
438         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
439         return -1;
440     }
441
442     if((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis){
443         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
444         return -1;
445     }
446
447     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
448         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
449         return -1;
450     }
451
452     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
453         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection are not supported yet, set threshold to 1000000000\n");
454         return -1;
455     }
456
457     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
458         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
459         return -1;
460     }
461
462     if(s->flags & CODEC_FLAG_LOW_DELAY){
463         if (s->codec_id != CODEC_ID_MPEG2VIDEO){
464             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg2\n");
465             return -1;
466         }
467         if (s->max_b_frames != 0){
468             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
469             return -1;
470         }
471     }
472
473     if(s->q_scale_type == 1){
474 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
475         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
476             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
477             return -1;
478         }
479 #endif
480         if(avctx->qmax > 12){
481             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
482             return -1;
483         }
484     }
485
486     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
487        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
488        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
489         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
490         return -1;
491     }
492
493     if(s->avctx->thread_count < 1){
494         av_log(avctx, AV_LOG_ERROR, "automatic thread number detection not supported by codec, patch welcome\n");
495         return -1;
496     }
497
498     if(s->avctx->thread_count > 1)
499         s->rtp_mode= 1;
500
501     if(!avctx->time_base.den || !avctx->time_base.num){
502         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
503         return -1;
504     }
505
506     i= (INT_MAX/2+128)>>8;
507     if(avctx->me_threshold >= i){
508         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
509         return -1;
510     }
511     if(avctx->mb_threshold >= i){
512         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
513         return -1;
514     }
515
516     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
517         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
518         avctx->b_frame_strategy = 0;
519     }
520
521     i= av_gcd(avctx->time_base.den, avctx->time_base.num);
522     if(i > 1){
523         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
524         avctx->time_base.den /= i;
525         avctx->time_base.num /= i;
526 //        return -1;
527     }
528
529     if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || s->codec_id==CODEC_ID_MJPEG){
530         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
531         s->inter_quant_bias= 0;
532     }else{
533         s->intra_quant_bias=0;
534         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
535     }
536
537     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
538         s->intra_quant_bias= avctx->intra_quant_bias;
539     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
540         s->inter_quant_bias= avctx->inter_quant_bias;
541
542     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
543
544     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
545         av_log(avctx, AV_LOG_ERROR, "timebase %d/%d not supported by MPEG 4 standard, "
546                "the maximum admitted value for the timebase denominator is %d\n",
547                s->avctx->time_base.num, s->avctx->time_base.den, (1<<16)-1);
548         return -1;
549     }
550     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
551
552     switch(avctx->codec->id) {
553     case CODEC_ID_MPEG1VIDEO:
554         s->out_format = FMT_MPEG1;
555         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
556         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
557         break;
558     case CODEC_ID_MPEG2VIDEO:
559         s->out_format = FMT_MPEG1;
560         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
561         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
562         s->rtp_mode= 1;
563         break;
564     case CODEC_ID_LJPEG:
565     case CODEC_ID_MJPEG:
566         s->out_format = FMT_MJPEG;
567         s->intra_only = 1; /* force intra only for jpeg */
568         if(avctx->codec->id == CODEC_ID_LJPEG && avctx->pix_fmt == PIX_FMT_BGRA){
569             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
570             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
571             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
572         }else{
573             s->mjpeg_vsample[0] = 2;
574             s->mjpeg_vsample[1] = 2>>chroma_v_shift;
575             s->mjpeg_vsample[2] = 2>>chroma_v_shift;
576             s->mjpeg_hsample[0] = 2;
577             s->mjpeg_hsample[1] = 2>>chroma_h_shift;
578             s->mjpeg_hsample[2] = 2>>chroma_h_shift;
579         }
580         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER)
581             || ff_mjpeg_encode_init(s) < 0)
582             return -1;
583         avctx->delay=0;
584         s->low_delay=1;
585         break;
586     case CODEC_ID_H261:
587         if (!CONFIG_H261_ENCODER)  return -1;
588         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
589             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay=0;
594         s->low_delay=1;
595         break;
596     case CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)  return -1;
598         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format), s->width, s->height) == 8) {
599             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
600             return -1;
601         }
602         s->out_format = FMT_H263;
603         avctx->delay=0;
604         s->low_delay=1;
605         break;
606     case CODEC_ID_H263P:
607         s->out_format = FMT_H263;
608         s->h263_plus = 1;
609         /* Fx */
610 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
611         if (avctx->flags & CODEC_FLAG_H263P_UMV)
612             s->umvplus = 1;
613         if (avctx->flags & CODEC_FLAG_H263P_AIV)
614             s->alt_inter_vlc = 1;
615         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
616             s->h263_slice_structured = 1;
617 #endif
618         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
619         s->modified_quant= s->h263_aic;
620         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
621         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
622
623         /* /Fx */
624         /* These are just to be sure */
625         avctx->delay=0;
626         s->low_delay=1;
627         break;
628     case CODEC_ID_FLV1:
629         s->out_format = FMT_H263;
630         s->h263_flv = 2; /* format = 1; 11-bit codes */
631         s->unrestricted_mv = 1;
632         s->rtp_mode=0; /* don't allow GOB */
633         avctx->delay=0;
634         s->low_delay=1;
635         break;
636     case CODEC_ID_RV10:
637         s->out_format = FMT_H263;
638         avctx->delay=0;
639         s->low_delay=1;
640         break;
641     case CODEC_ID_RV20:
642         s->out_format = FMT_H263;
643         avctx->delay=0;
644         s->low_delay=1;
645         s->modified_quant=1;
646         s->h263_aic=1;
647         s->h263_plus=1;
648         s->loop_filter=1;
649         s->unrestricted_mv= 0;
650         break;
651     case CODEC_ID_MPEG4:
652         s->out_format = FMT_H263;
653         s->h263_pred = 1;
654         s->unrestricted_mv = 1;
655         s->low_delay= s->max_b_frames ? 0 : 1;
656         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
657         break;
658     case CODEC_ID_MSMPEG4V2:
659         s->out_format = FMT_H263;
660         s->h263_pred = 1;
661         s->unrestricted_mv = 1;
662         s->msmpeg4_version= 2;
663         avctx->delay=0;
664         s->low_delay=1;
665         break;
666     case CODEC_ID_MSMPEG4V3:
667         s->out_format = FMT_H263;
668         s->h263_pred = 1;
669         s->unrestricted_mv = 1;
670         s->msmpeg4_version= 3;
671         s->flipflop_rounding=1;
672         avctx->delay=0;
673         s->low_delay=1;
674         break;
675     case CODEC_ID_WMV1:
676         s->out_format = FMT_H263;
677         s->h263_pred = 1;
678         s->unrestricted_mv = 1;
679         s->msmpeg4_version= 4;
680         s->flipflop_rounding=1;
681         avctx->delay=0;
682         s->low_delay=1;
683         break;
684     case CODEC_ID_WMV2:
685         s->out_format = FMT_H263;
686         s->h263_pred = 1;
687         s->unrestricted_mv = 1;
688         s->msmpeg4_version= 5;
689         s->flipflop_rounding=1;
690         avctx->delay=0;
691         s->low_delay=1;
692         break;
693     default:
694         return -1;
695     }
696
697     avctx->has_b_frames= !s->low_delay;
698
699     s->encoding = 1;
700
701     s->progressive_frame=
702     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
703
704     /* init */
705     if (MPV_common_init(s) < 0)
706         return -1;
707
708     if(!s->dct_quantize)
709         s->dct_quantize = dct_quantize_c;
710     if(!s->denoise_dct)
711         s->denoise_dct = denoise_dct_c;
712     s->fast_dct_quantize = s->dct_quantize;
713     if(avctx->trellis)
714         s->dct_quantize = dct_quantize_trellis_c;
715
716     if((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
717         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
718
719     s->quant_precision=5;
720
721     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
722     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
723
724     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
725         ff_h261_encode_init(s);
726     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
727         h263_encode_init(s);
728     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
729         ff_msmpeg4_encode_init(s);
730     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
731         && s->out_format == FMT_MPEG1)
732         ff_mpeg1_encode_init(s);
733
734     /* init q matrix */
735     for(i=0;i<64;i++) {
736         int j= s->dsp.idct_permutation[i];
737         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
738             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
739             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
740         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
741             s->intra_matrix[j] =
742             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
743         }else
744         { /* mpeg1/2 */
745             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
746             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
747         }
748         if(s->avctx->intra_matrix)
749             s->intra_matrix[j] = s->avctx->intra_matrix[i];
750         if(s->avctx->inter_matrix)
751             s->inter_matrix[j] = s->avctx->inter_matrix[i];
752     }
753
754     /* precompute matrix */
755     /* for mjpeg, we do include qscale in the matrix */
756     if (s->out_format != FMT_MJPEG) {
757         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
758                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
759         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
760                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
761     }
762
763     if(ff_rate_control_init(s) < 0)
764         return -1;
765
766     return 0;
767 }
768
769 av_cold int MPV_encode_end(AVCodecContext *avctx)
770 {
771     MpegEncContext *s = avctx->priv_data;
772
773     ff_rate_control_uninit(s);
774
775     MPV_common_end(s);
776     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
777         ff_mjpeg_encode_close(s);
778
779     av_freep(&avctx->extradata);
780
781     return 0;
782 }
783
784 static int get_sae(uint8_t *src, int ref, int stride){
785     int x,y;
786     int acc=0;
787
788     for(y=0; y<16; y++){
789         for(x=0; x<16; x++){
790             acc+= FFABS(src[x+y*stride] - ref);
791         }
792     }
793
794     return acc;
795 }
796
797 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
798     int x, y, w, h;
799     int acc=0;
800
801     w= s->width &~15;
802     h= s->height&~15;
803
804     for(y=0; y<h; y+=16){
805         for(x=0; x<w; x+=16){
806             int offset= x + y*stride;
807             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
808             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
809             int sae = get_sae(src + offset, mean, stride);
810
811             acc+= sae + 500 < sad;
812         }
813     }
814     return acc;
815 }
816
817
818 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
819     AVFrame *pic=NULL;
820     int64_t pts;
821     int i;
822     const int encoding_delay= s->max_b_frames;
823     int direct=1;
824
825     if(pic_arg){
826         pts= pic_arg->pts;
827         pic_arg->display_picture_number= s->input_picture_number++;
828
829         if(pts != AV_NOPTS_VALUE){
830             if(s->user_specified_pts != AV_NOPTS_VALUE){
831                 int64_t time= pts;
832                 int64_t last= s->user_specified_pts;
833
834                 if(time <= last){
835                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
836                     return -1;
837                 }
838             }
839             s->user_specified_pts= pts;
840         }else{
841             if(s->user_specified_pts != AV_NOPTS_VALUE){
842                 s->user_specified_pts=
843                 pts= s->user_specified_pts + 1;
844                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
845             }else{
846                 pts= pic_arg->display_picture_number;
847             }
848         }
849     }
850
851   if(pic_arg){
852     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
853     if(pic_arg->linesize[0] != s->linesize) direct=0;
854     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
855     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
856
857 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
858
859     if(direct){
860         i= ff_find_unused_picture(s, 1);
861
862         pic= (AVFrame*)&s->picture[i];
863         pic->reference= 3;
864
865         for(i=0; i<4; i++){
866             pic->data[i]= pic_arg->data[i];
867             pic->linesize[i]= pic_arg->linesize[i];
868         }
869         if(ff_alloc_picture(s, (Picture*)pic, 1) < 0){
870             return -1;
871         }
872     }else{
873         i= ff_find_unused_picture(s, 0);
874
875         pic= (AVFrame*)&s->picture[i];
876         pic->reference= 3;
877
878         if(ff_alloc_picture(s, (Picture*)pic, 0) < 0){
879             return -1;
880         }
881
882         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
883            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
884            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
885        // empty
886         }else{
887             int h_chroma_shift, v_chroma_shift;
888             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
889
890             for(i=0; i<3; i++){
891                 int src_stride= pic_arg->linesize[i];
892                 int dst_stride= i ? s->uvlinesize : s->linesize;
893                 int h_shift= i ? h_chroma_shift : 0;
894                 int v_shift= i ? v_chroma_shift : 0;
895                 int w= s->width >>h_shift;
896                 int h= s->height>>v_shift;
897                 uint8_t *src= pic_arg->data[i];
898                 uint8_t *dst= pic->data[i];
899
900                 if(!s->avctx->rc_buffer_size)
901                     dst +=INPLACE_OFFSET;
902
903                 if(src_stride==dst_stride)
904                     memcpy(dst, src, src_stride*h);
905                 else{
906                     while(h--){
907                         memcpy(dst, src, w);
908                         dst += dst_stride;
909                         src += src_stride;
910                     }
911                 }
912             }
913         }
914     }
915     copy_picture_attributes(s, pic, pic_arg);
916     pic->pts= pts; //we set this here to avoid modifiying pic_arg
917   }
918
919     /* shift buffer entries */
920     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
921         s->input_picture[i-1]= s->input_picture[i];
922
923     s->input_picture[encoding_delay]= (Picture*)pic;
924
925     return 0;
926 }
927
928 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
929     int x, y, plane;
930     int score=0;
931     int64_t score64=0;
932
933     for(plane=0; plane<3; plane++){
934         const int stride = p->f.linesize[plane];
935         const int bw= plane ? 1 : 2;
936         for(y=0; y<s->mb_height*bw; y++){
937             for(x=0; x<s->mb_width*bw; x++){
938                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0: 16;
939                 int v   = s->dsp.frame_skip_cmp[1](s, p->f.data[plane] + 8*(x + y*stride)+off, ref->f.data[plane] + 8*(x + y*stride), stride, 8);
940
941                 switch(s->avctx->frame_skip_exp){
942                     case 0: score= FFMAX(score, v); break;
943                     case 1: score+= FFABS(v);break;
944                     case 2: score+= v*v;break;
945                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
946                     case 4: score64+= v*v*(int64_t)(v*v);break;
947                 }
948             }
949         }
950     }
951
952     if(score) score64= score;
953
954     if(score64 < s->avctx->frame_skip_threshold)
955         return 1;
956     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
957         return 1;
958     return 0;
959 }
960
961 static int estimate_best_b_count(MpegEncContext *s){
962     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
963     AVCodecContext *c = avcodec_alloc_context3(NULL);
964     AVFrame input[FF_MAX_B_FRAMES+2];
965     const int scale= s->avctx->brd_scale;
966     int i, j, out_size, p_lambda, b_lambda, lambda2;
967     int outbuf_size= s->width * s->height; //FIXME
968     uint8_t *outbuf= av_malloc(outbuf_size);
969     int64_t best_rd= INT64_MAX;
970     int best_b_count= -1;
971
972     assert(scale>=0 && scale <=3);
973
974 //    emms_c();
975     p_lambda= s->last_lambda_for[AV_PICTURE_TYPE_P]; //s->next_picture_ptr->quality;
976     b_lambda= s->last_lambda_for[AV_PICTURE_TYPE_B]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
977     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
978     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
979
980     c->width = s->width >> scale;
981     c->height= s->height>> scale;
982     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
983     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
984     c->mb_decision= s->avctx->mb_decision;
985     c->me_cmp= s->avctx->me_cmp;
986     c->mb_cmp= s->avctx->mb_cmp;
987     c->me_sub_cmp= s->avctx->me_sub_cmp;
988     c->pix_fmt = PIX_FMT_YUV420P;
989     c->time_base= s->avctx->time_base;
990     c->max_b_frames= s->max_b_frames;
991
992     if (avcodec_open2(c, codec, NULL) < 0)
993         return -1;
994
995     for(i=0; i<s->max_b_frames+2; i++){
996         int ysize= c->width*c->height;
997         int csize= (c->width/2)*(c->height/2);
998         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
999
1000         avcodec_get_frame_defaults(&input[i]);
1001         input[i].data[0]= av_malloc(ysize + 2*csize);
1002         input[i].data[1]= input[i].data[0] + ysize;
1003         input[i].data[2]= input[i].data[1] + csize;
1004         input[i].linesize[0]= c->width;
1005         input[i].linesize[1]=
1006         input[i].linesize[2]= c->width/2;
1007
1008         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
1009             pre_input= *pre_input_ptr;
1010
1011             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1012                 pre_input.f.data[0] += INPLACE_OFFSET;
1013                 pre_input.f.data[1] += INPLACE_OFFSET;
1014                 pre_input.f.data[2] += INPLACE_OFFSET;
1015             }
1016
1017             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.f.data[0], pre_input.f.linesize[0], c->width,      c->height);
1018             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.f.data[1], pre_input.f.linesize[1], c->width >> 1, c->height >> 1);
1019             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.f.data[2], pre_input.f.linesize[2], c->width >> 1, c->height >> 1);
1020         }
1021     }
1022
1023     for(j=0; j<s->max_b_frames+1; j++){
1024         int64_t rd=0;
1025
1026         if(!s->input_picture[j])
1027             break;
1028
1029         c->error[0]= c->error[1]= c->error[2]= 0;
1030
1031         input[0].pict_type= AV_PICTURE_TYPE_I;
1032         input[0].quality= 1 * FF_QP2LAMBDA;
1033         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
1034 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1035
1036         for(i=0; i<s->max_b_frames+1; i++){
1037             int is_p= i % (j+1) == j || i==s->max_b_frames;
1038
1039             input[i+1].pict_type= is_p ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1040             input[i+1].quality= is_p ? p_lambda : b_lambda;
1041             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
1042             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1043         }
1044
1045         /* get the delayed frames */
1046         while(out_size){
1047             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1048             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1049         }
1050
1051         rd += c->error[0] + c->error[1] + c->error[2];
1052
1053         if(rd < best_rd){
1054             best_rd= rd;
1055             best_b_count= j;
1056         }
1057     }
1058
1059     av_freep(&outbuf);
1060     avcodec_close(c);
1061     av_freep(&c);
1062
1063     for(i=0; i<s->max_b_frames+2; i++){
1064         av_freep(&input[i].data[0]);
1065     }
1066
1067     return best_b_count;
1068 }
1069
1070 static int select_input_picture(MpegEncContext *s){
1071     int i;
1072
1073     for(i=1; i<MAX_PICTURE_COUNT; i++)
1074         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1075     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1076
1077     /* set next picture type & ordering */
1078     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1079         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1080             s->reordered_input_picture[0]= s->input_picture[0];
1081             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1082             s->reordered_input_picture[0]->f.coded_picture_number = s->coded_picture_number++;
1083         }else{
1084             int b_frames;
1085
1086             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
1087                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
1088                 //FIXME check that te gop check above is +-1 correct
1089 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->f.data[0], s->input_picture[0]->pts);
1090
1091                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1092                         for(i=0; i<4; i++)
1093                             s->input_picture[0]->f.data[i] = NULL;
1094                         s->input_picture[0]->f.type = 0;
1095                     }else{
1096                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
1097                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1098
1099                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
1100                     }
1101
1102                     emms_c();
1103                     ff_vbv_update(s, 0);
1104
1105                     goto no_output_pic;
1106                 }
1107             }
1108
1109             if(s->flags&CODEC_FLAG_PASS2){
1110                 for(i=0; i<s->max_b_frames+1; i++){
1111                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1112
1113                     if(pict_num >= s->rc_context.num_entries)
1114                         break;
1115                     if(!s->input_picture[i]){
1116                         s->rc_context.entry[pict_num-1].new_pict_type = AV_PICTURE_TYPE_P;
1117                         break;
1118                     }
1119
1120                     s->input_picture[i]->f.pict_type =
1121                         s->rc_context.entry[pict_num].new_pict_type;
1122                 }
1123             }
1124
1125             if(s->avctx->b_frame_strategy==0){
1126                 b_frames= s->max_b_frames;
1127                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1128             }else if(s->avctx->b_frame_strategy==1){
1129                 for(i=1; i<s->max_b_frames+1; i++){
1130                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1131                         s->input_picture[i]->b_frame_score=
1132                             get_intra_count(s, s->input_picture[i  ]->f.data[0],
1133                                                s->input_picture[i-1]->f.data[0], s->linesize) + 1;
1134                     }
1135                 }
1136                 for(i=0; i<s->max_b_frames+1; i++){
1137                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
1138                 }
1139
1140                 b_frames= FFMAX(0, i-1);
1141
1142                 /* reset scores */
1143                 for(i=0; i<b_frames+1; i++){
1144                     s->input_picture[i]->b_frame_score=0;
1145                 }
1146             }else if(s->avctx->b_frame_strategy==2){
1147                 b_frames= estimate_best_b_count(s);
1148             }else{
1149                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1150                 b_frames=0;
1151             }
1152
1153             emms_c();
1154 //static int b_count=0;
1155 //b_count+= b_frames;
1156 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1157
1158             for(i= b_frames - 1; i>=0; i--){
1159                 int type = s->input_picture[i]->f.pict_type;
1160                 if(type && type != AV_PICTURE_TYPE_B)
1161                     b_frames= i;
1162             }
1163             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B && b_frames == s->max_b_frames){
1164                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
1165             }
1166
1167             if(s->picture_in_gop_number + b_frames >= s->gop_size){
1168               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
1169                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
1170               }else{
1171                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
1172                     b_frames=0;
1173                 s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1174               }
1175             }
1176
1177             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
1178                && b_frames
1179                && s->input_picture[b_frames]->f.pict_type== AV_PICTURE_TYPE_I)
1180                 b_frames--;
1181
1182             s->reordered_input_picture[0]= s->input_picture[b_frames];
1183             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1184                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1185             s->reordered_input_picture[0]->f.coded_picture_number = s->coded_picture_number++;
1186             for(i=0; i<b_frames; i++){
1187                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1188                 s->reordered_input_picture[i + 1]->f.pict_type = AV_PICTURE_TYPE_B;
1189                 s->reordered_input_picture[i + 1]->f.coded_picture_number = s->coded_picture_number++;
1190             }
1191         }
1192     }
1193 no_output_pic:
1194     if(s->reordered_input_picture[0]){
1195         s->reordered_input_picture[0]->f.reference = s->reordered_input_picture[0]->f.pict_type!=AV_PICTURE_TYPE_B ? 3 : 0;
1196
1197         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1198
1199         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size) {
1200             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
1201
1202             int i= ff_find_unused_picture(s, 0);
1203             Picture *pic= &s->picture[i];
1204
1205             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1206             if(ff_alloc_picture(s, pic, 0) < 0){
1207                 return -1;
1208             }
1209
1210             /* mark us unused / free shared pic */
1211             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1212                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
1213             for(i=0; i<4; i++)
1214                 s->reordered_input_picture[0]->f.data[i] = NULL;
1215             s->reordered_input_picture[0]->f.type = 0;
1216
1217             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
1218
1219             s->current_picture_ptr= pic;
1220         }else{
1221             // input is not a shared pix -> reuse buffer for current_pix
1222
1223             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1224                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1225
1226             s->current_picture_ptr= s->reordered_input_picture[0];
1227             for(i=0; i<4; i++){
1228                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1229             }
1230         }
1231         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1232
1233         s->picture_number = s->new_picture.f.display_picture_number;
1234 //printf("dpn:%d\n", s->picture_number);
1235     }else{
1236        memset(&s->new_picture, 0, sizeof(Picture));
1237     }
1238     return 0;
1239 }
1240
1241 int MPV_encode_picture(AVCodecContext *avctx,
1242                        unsigned char *buf, int buf_size, void *data)
1243 {
1244     MpegEncContext *s = avctx->priv_data;
1245     AVFrame *pic_arg = data;
1246     int i, stuffing_count, context_count = avctx->thread_count;
1247
1248     for(i=0; i<context_count; i++){
1249         int start_y= s->thread_context[i]->start_mb_y;
1250         int   end_y= s->thread_context[i]->  end_mb_y;
1251         int h= s->mb_height;
1252         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
1253         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
1254
1255         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1256     }
1257
1258     s->picture_in_gop_number++;
1259
1260     if(load_input_picture(s, pic_arg) < 0)
1261         return -1;
1262
1263     if(select_input_picture(s) < 0){
1264         return -1;
1265     }
1266
1267     /* output? */
1268     if (s->new_picture.f.data[0]) {
1269         s->pict_type = s->new_picture.f.pict_type;
1270 //emms_c();
1271 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1272         MPV_frame_start(s, avctx);
1273 vbv_retry:
1274         if (encode_picture(s, s->picture_number) < 0)
1275             return -1;
1276
1277         avctx->header_bits = s->header_bits;
1278         avctx->mv_bits     = s->mv_bits;
1279         avctx->misc_bits   = s->misc_bits;
1280         avctx->i_tex_bits  = s->i_tex_bits;
1281         avctx->p_tex_bits  = s->p_tex_bits;
1282         avctx->i_count     = s->i_count;
1283         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1284         avctx->skip_count  = s->skip_count;
1285
1286         MPV_frame_end(s);
1287
1288         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1289             ff_mjpeg_encode_picture_trailer(s);
1290
1291         if(avctx->rc_buffer_size){
1292             RateControlContext *rcc= &s->rc_context;
1293             int max_size= rcc->buffer_index * avctx->rc_max_available_vbv_use;
1294
1295             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
1296                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
1297                 if(s->adaptive_quant){
1298                     int i;
1299                     for(i=0; i<s->mb_height*s->mb_stride; i++)
1300                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
1301                 }
1302                 s->mb_skipped = 0;        //done in MPV_frame_start()
1303                 if(s->pict_type==AV_PICTURE_TYPE_P){ //done in encode_picture() so we must undo it
1304                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
1305                         s->no_rounding ^= 1;
1306                 }
1307                 if(s->pict_type!=AV_PICTURE_TYPE_B){
1308                     s->time_base= s->last_time_base;
1309                     s->last_non_b_time= s->time - s->pp_time;
1310                 }
1311 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1312                 for(i=0; i<context_count; i++){
1313                     PutBitContext *pb= &s->thread_context[i]->pb;
1314                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1315                 }
1316                 goto vbv_retry;
1317             }
1318
1319             assert(s->avctx->rc_max_rate);
1320         }
1321
1322         if(s->flags&CODEC_FLAG_PASS1)
1323             ff_write_pass1_stats(s);
1324
1325         for(i=0; i<4; i++){
1326             s->current_picture_ptr->f.error[i]  = s->current_picture.f.error[i];
1327             avctx->error[i]                        += s->current_picture_ptr->f.error[i];
1328         }
1329
1330         if(s->flags&CODEC_FLAG_PASS1)
1331             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
1332         flush_put_bits(&s->pb);
1333         s->frame_bits  = put_bits_count(&s->pb);
1334
1335         stuffing_count= ff_vbv_update(s, s->frame_bits);
1336         if(stuffing_count){
1337             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
1338                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1339                 return -1;
1340             }
1341
1342             switch(s->codec_id){
1343             case CODEC_ID_MPEG1VIDEO:
1344             case CODEC_ID_MPEG2VIDEO:
1345                 while(stuffing_count--){
1346                     put_bits(&s->pb, 8, 0);
1347                 }
1348             break;
1349             case CODEC_ID_MPEG4:
1350                 put_bits(&s->pb, 16, 0);
1351                 put_bits(&s->pb, 16, 0x1C3);
1352                 stuffing_count -= 4;
1353                 while(stuffing_count--){
1354                     put_bits(&s->pb, 8, 0xFF);
1355                 }
1356             break;
1357             default:
1358                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1359             }
1360             flush_put_bits(&s->pb);
1361             s->frame_bits  = put_bits_count(&s->pb);
1362         }
1363
1364         /* update mpeg1/2 vbv_delay for CBR */
1365         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
1366            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
1367             int vbv_delay, min_delay;
1368             double inbits = s->avctx->rc_max_rate*av_q2d(s->avctx->time_base);
1369             int    minbits= s->frame_bits - 8*(s->vbv_delay_ptr - s->pb.buf - 1);
1370             double bits   = s->rc_context.buffer_index + minbits - inbits;
1371
1372             if(bits<0)
1373                 av_log(s->avctx, AV_LOG_ERROR, "Internal error, negative bits\n");
1374
1375             assert(s->repeat_first_field==0);
1376
1377             vbv_delay=     bits * 90000                               / s->avctx->rc_max_rate;
1378             min_delay= (minbits * 90000LL + s->avctx->rc_max_rate - 1)/ s->avctx->rc_max_rate;
1379
1380             vbv_delay= FFMAX(vbv_delay, min_delay);
1381
1382             assert(vbv_delay < 0xFFFF);
1383
1384             s->vbv_delay_ptr[0] &= 0xF8;
1385             s->vbv_delay_ptr[0] |= vbv_delay>>13;
1386             s->vbv_delay_ptr[1]  = vbv_delay>>5;
1387             s->vbv_delay_ptr[2] &= 0x07;
1388             s->vbv_delay_ptr[2] |= vbv_delay<<3;
1389             avctx->vbv_delay = vbv_delay*300;
1390         }
1391         s->total_bits += s->frame_bits;
1392         avctx->frame_bits  = s->frame_bits;
1393     }else{
1394         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1395         s->frame_bits=0;
1396     }
1397     assert((s->frame_bits&7)==0);
1398
1399     return s->frame_bits/8;
1400 }
1401
1402 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
1403 {
1404     static const char tab[64]=
1405         {3,2,2,1,1,1,1,1,
1406          1,1,1,1,1,1,1,1,
1407          1,1,1,1,1,1,1,1,
1408          0,0,0,0,0,0,0,0,
1409          0,0,0,0,0,0,0,0,
1410          0,0,0,0,0,0,0,0,
1411          0,0,0,0,0,0,0,0,
1412          0,0,0,0,0,0,0,0};
1413     int score=0;
1414     int run=0;
1415     int i;
1416     DCTELEM *block= s->block[n];
1417     const int last_index= s->block_last_index[n];
1418     int skip_dc;
1419
1420     if(threshold<0){
1421         skip_dc=0;
1422         threshold= -threshold;
1423     }else
1424         skip_dc=1;
1425
1426     /* Are all we could set to zero already zero? */
1427     if(last_index<=skip_dc - 1) return;
1428
1429     for(i=0; i<=last_index; i++){
1430         const int j = s->intra_scantable.permutated[i];
1431         const int level = FFABS(block[j]);
1432         if(level==1){
1433             if(skip_dc && i==0) continue;
1434             score+= tab[run];
1435             run=0;
1436         }else if(level>1){
1437             return;
1438         }else{
1439             run++;
1440         }
1441     }
1442     if(score >= threshold) return;
1443     for(i=skip_dc; i<=last_index; i++){
1444         const int j = s->intra_scantable.permutated[i];
1445         block[j]=0;
1446     }
1447     if(block[0]) s->block_last_index[n]= 0;
1448     else         s->block_last_index[n]= -1;
1449 }
1450
1451 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
1452 {
1453     int i;
1454     const int maxlevel= s->max_qcoeff;
1455     const int minlevel= s->min_qcoeff;
1456     int overflow=0;
1457
1458     if(s->mb_intra){
1459         i=1; //skip clipping of intra dc
1460     }else
1461         i=0;
1462
1463     for(;i<=last_index; i++){
1464         const int j= s->intra_scantable.permutated[i];
1465         int level = block[j];
1466
1467         if     (level>maxlevel){
1468             level=maxlevel;
1469             overflow++;
1470         }else if(level<minlevel){
1471             level=minlevel;
1472             overflow++;
1473         }
1474
1475         block[j]= level;
1476     }
1477
1478     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1479         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
1480 }
1481
1482 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride){
1483     int x, y;
1484 //FIXME optimize
1485     for(y=0; y<8; y++){
1486         for(x=0; x<8; x++){
1487             int x2, y2;
1488             int sum=0;
1489             int sqr=0;
1490             int count=0;
1491
1492             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
1493                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
1494                     int v= ptr[x2 + y2*stride];
1495                     sum += v;
1496                     sqr += v*v;
1497                     count++;
1498                 }
1499             }
1500             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
1501         }
1502     }
1503 }
1504
1505 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
1506 {
1507     int16_t weight[8][64];
1508     DCTELEM orig[8][64];
1509     const int mb_x= s->mb_x;
1510     const int mb_y= s->mb_y;
1511     int i;
1512     int skip_dct[8];
1513     int dct_offset   = s->linesize*8; //default for progressive frames
1514     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1515     int wrap_y, wrap_c;
1516
1517     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
1518
1519     if(s->adaptive_quant){
1520         const int last_qp= s->qscale;
1521         const int mb_xy= mb_x + mb_y*s->mb_stride;
1522
1523         s->lambda= s->lambda_table[mb_xy];
1524         update_qscale(s);
1525
1526         if(!(s->flags&CODEC_FLAG_QP_RD)){
1527             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1528             s->dquant= s->qscale - last_qp;
1529
1530             if(s->out_format==FMT_H263){
1531                 s->dquant= av_clip(s->dquant, -2, 2);
1532
1533                 if(s->codec_id==CODEC_ID_MPEG4){
1534                     if(!s->mb_intra){
1535                         if(s->pict_type == AV_PICTURE_TYPE_B){
1536                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
1537                                 s->dquant= 0;
1538                         }
1539                         if(s->mv_type==MV_TYPE_8X8)
1540                             s->dquant=0;
1541                     }
1542                 }
1543             }
1544         }
1545         ff_set_qscale(s, last_qp + s->dquant);
1546     }else if(s->flags&CODEC_FLAG_QP_RD)
1547         ff_set_qscale(s, s->qscale + s->dquant);
1548
1549     wrap_y = s->linesize;
1550     wrap_c = s->uvlinesize;
1551     ptr_y  = s->new_picture.f.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
1552     ptr_cb = s->new_picture.f.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1553     ptr_cr = s->new_picture.f.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1554
1555     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
1556         uint8_t *ebuf= s->edge_emu_buffer + 32;
1557         s->dsp.emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
1558         ptr_y= ebuf;
1559         s->dsp.emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1560         ptr_cb= ebuf+18*wrap_y;
1561         s->dsp.emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1562         ptr_cr= ebuf+18*wrap_y+8;
1563     }
1564
1565     if (s->mb_intra) {
1566         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
1567             int progressive_score, interlaced_score;
1568
1569             s->interlaced_dct=0;
1570             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
1571                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
1572
1573             if(progressive_score > 0){
1574                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
1575                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
1576                 if(progressive_score > interlaced_score){
1577                     s->interlaced_dct=1;
1578
1579                     dct_offset= wrap_y;
1580                     wrap_y<<=1;
1581                     if (s->chroma_format == CHROMA_422)
1582                         wrap_c<<=1;
1583                 }
1584             }
1585         }
1586
1587         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
1588         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
1589         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
1590         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1591
1592         if(s->flags&CODEC_FLAG_GRAY){
1593             skip_dct[4]= 1;
1594             skip_dct[5]= 1;
1595         }else{
1596             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1597             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1598             if(!s->chroma_y_shift){ /* 422 */
1599                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
1600                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
1601             }
1602         }
1603     }else{
1604         op_pixels_func (*op_pix)[4];
1605         qpel_mc_func (*op_qpix)[16];
1606         uint8_t *dest_y, *dest_cb, *dest_cr;
1607
1608         dest_y  = s->dest[0];
1609         dest_cb = s->dest[1];
1610         dest_cr = s->dest[2];
1611
1612         if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
1613             op_pix = s->dsp.put_pixels_tab;
1614             op_qpix= s->dsp.put_qpel_pixels_tab;
1615         }else{
1616             op_pix = s->dsp.put_no_rnd_pixels_tab;
1617             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
1618         }
1619
1620         if (s->mv_dir & MV_DIR_FORWARD) {
1621             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data, op_pix, op_qpix);
1622             op_pix = s->dsp.avg_pixels_tab;
1623             op_qpix= s->dsp.avg_qpel_pixels_tab;
1624         }
1625         if (s->mv_dir & MV_DIR_BACKWARD) {
1626             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data, op_pix, op_qpix);
1627         }
1628
1629         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
1630             int progressive_score, interlaced_score;
1631
1632             s->interlaced_dct=0;
1633             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
1634                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
1635
1636             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
1637
1638             if(progressive_score>0){
1639                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
1640                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
1641
1642                 if(progressive_score > interlaced_score){
1643                     s->interlaced_dct=1;
1644
1645                     dct_offset= wrap_y;
1646                     wrap_y<<=1;
1647                     if (s->chroma_format == CHROMA_422)
1648                         wrap_c<<=1;
1649                 }
1650             }
1651         }
1652
1653         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
1654         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
1655         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
1656         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
1657
1658         if(s->flags&CODEC_FLAG_GRAY){
1659             skip_dct[4]= 1;
1660             skip_dct[5]= 1;
1661         }else{
1662             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1663             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1664             if(!s->chroma_y_shift){ /* 422 */
1665                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
1666                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
1667             }
1668         }
1669         /* pre quantization */
1670         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
1671             //FIXME optimize
1672             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
1673             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
1674             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
1675             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
1676             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
1677             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
1678             if(!s->chroma_y_shift){ /* 422 */
1679                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
1680                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
1681             }
1682         }
1683     }
1684
1685     if(s->avctx->quantizer_noise_shaping){
1686         if(!skip_dct[0]) get_visual_weight(weight[0], ptr_y                 , wrap_y);
1687         if(!skip_dct[1]) get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1688         if(!skip_dct[2]) get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1689         if(!skip_dct[3]) get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1690         if(!skip_dct[4]) get_visual_weight(weight[4], ptr_cb                , wrap_c);
1691         if(!skip_dct[5]) get_visual_weight(weight[5], ptr_cr                , wrap_c);
1692         if(!s->chroma_y_shift){ /* 422 */
1693             if(!skip_dct[6]) get_visual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
1694             if(!skip_dct[7]) get_visual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
1695         }
1696         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
1697     }
1698
1699     /* DCT & quantize */
1700     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
1701     {
1702         for(i=0;i<mb_block_count;i++) {
1703             if(!skip_dct[i]){
1704                 int overflow;
1705                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1706             // FIXME we could decide to change to quantizer instead of clipping
1707             // JS: I don't think that would be a good idea it could lower quality instead
1708             //     of improve it. Just INTRADC clipping deserves changes in quantizer
1709                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
1710             }else
1711                 s->block_last_index[i]= -1;
1712         }
1713         if(s->avctx->quantizer_noise_shaping){
1714             for(i=0;i<mb_block_count;i++) {
1715                 if(!skip_dct[i]){
1716                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
1717                 }
1718             }
1719         }
1720
1721         if(s->luma_elim_threshold && !s->mb_intra)
1722             for(i=0; i<4; i++)
1723                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1724         if(s->chroma_elim_threshold && !s->mb_intra)
1725             for(i=4; i<mb_block_count; i++)
1726                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1727
1728         if(s->flags & CODEC_FLAG_CBP_RD){
1729             for(i=0;i<mb_block_count;i++) {
1730                 if(s->block_last_index[i] == -1)
1731                     s->coded_score[i]= INT_MAX/256;
1732             }
1733         }
1734     }
1735
1736     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
1737         s->block_last_index[4]=
1738         s->block_last_index[5]= 0;
1739         s->block[4][0]=
1740         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
1741     }
1742
1743     //non c quantize code returns incorrect block_last_index FIXME
1744     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
1745         for(i=0; i<mb_block_count; i++){
1746             int j;
1747             if(s->block_last_index[i]>0){
1748                 for(j=63; j>0; j--){
1749                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
1750                 }
1751                 s->block_last_index[i]= j;
1752             }
1753         }
1754     }
1755
1756     /* huffman encode */
1757     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1758     case CODEC_ID_MPEG1VIDEO:
1759     case CODEC_ID_MPEG2VIDEO:
1760         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1761             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1762         break;
1763     case CODEC_ID_MPEG4:
1764         if (CONFIG_MPEG4_ENCODER)
1765             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
1766         break;
1767     case CODEC_ID_MSMPEG4V2:
1768     case CODEC_ID_MSMPEG4V3:
1769     case CODEC_ID_WMV1:
1770         if (CONFIG_MSMPEG4_ENCODER)
1771             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
1772         break;
1773     case CODEC_ID_WMV2:
1774         if (CONFIG_WMV2_ENCODER)
1775             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
1776         break;
1777     case CODEC_ID_H261:
1778         if (CONFIG_H261_ENCODER)
1779             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
1780         break;
1781     case CODEC_ID_H263:
1782     case CODEC_ID_H263P:
1783     case CODEC_ID_FLV1:
1784     case CODEC_ID_RV10:
1785     case CODEC_ID_RV20:
1786         if (CONFIG_H263_ENCODER)
1787             h263_encode_mb(s, s->block, motion_x, motion_y);
1788         break;
1789     case CODEC_ID_MJPEG:
1790         if (CONFIG_MJPEG_ENCODER)
1791             ff_mjpeg_encode_mb(s, s->block);
1792         break;
1793     default:
1794         assert(0);
1795     }
1796 }
1797
1798 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
1799 {
1800     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
1801     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
1802 }
1803
1804 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
1805     int i;
1806
1807     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
1808
1809     /* mpeg1 */
1810     d->mb_skip_run= s->mb_skip_run;
1811     for(i=0; i<3; i++)
1812         d->last_dc[i]= s->last_dc[i];
1813
1814     /* statistics */
1815     d->mv_bits= s->mv_bits;
1816     d->i_tex_bits= s->i_tex_bits;
1817     d->p_tex_bits= s->p_tex_bits;
1818     d->i_count= s->i_count;
1819     d->f_count= s->f_count;
1820     d->b_count= s->b_count;
1821     d->skip_count= s->skip_count;
1822     d->misc_bits= s->misc_bits;
1823     d->last_bits= 0;
1824
1825     d->mb_skipped= 0;
1826     d->qscale= s->qscale;
1827     d->dquant= s->dquant;
1828
1829     d->esc3_level_length= s->esc3_level_length;
1830 }
1831
1832 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
1833     int i;
1834
1835     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
1836     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
1837
1838     /* mpeg1 */
1839     d->mb_skip_run= s->mb_skip_run;
1840     for(i=0; i<3; i++)
1841         d->last_dc[i]= s->last_dc[i];
1842
1843     /* statistics */
1844     d->mv_bits= s->mv_bits;
1845     d->i_tex_bits= s->i_tex_bits;
1846     d->p_tex_bits= s->p_tex_bits;
1847     d->i_count= s->i_count;
1848     d->f_count= s->f_count;
1849     d->b_count= s->b_count;
1850     d->skip_count= s->skip_count;
1851     d->misc_bits= s->misc_bits;
1852
1853     d->mb_intra= s->mb_intra;
1854     d->mb_skipped= s->mb_skipped;
1855     d->mv_type= s->mv_type;
1856     d->mv_dir= s->mv_dir;
1857     d->pb= s->pb;
1858     if(s->data_partitioning){
1859         d->pb2= s->pb2;
1860         d->tex_pb= s->tex_pb;
1861     }
1862     d->block= s->block;
1863     for(i=0; i<8; i++)
1864         d->block_last_index[i]= s->block_last_index[i];
1865     d->interlaced_dct= s->interlaced_dct;
1866     d->qscale= s->qscale;
1867
1868     d->esc3_level_length= s->esc3_level_length;
1869 }
1870
1871 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
1872                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
1873                            int *dmin, int *next_block, int motion_x, int motion_y)
1874 {
1875     int score;
1876     uint8_t *dest_backup[3];
1877
1878     copy_context_before_encode(s, backup, type);
1879
1880     s->block= s->blocks[*next_block];
1881     s->pb= pb[*next_block];
1882     if(s->data_partitioning){
1883         s->pb2   = pb2   [*next_block];
1884         s->tex_pb= tex_pb[*next_block];
1885     }
1886
1887     if(*next_block){
1888         memcpy(dest_backup, s->dest, sizeof(s->dest));
1889         s->dest[0] = s->rd_scratchpad;
1890         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
1891         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
1892         assert(s->linesize >= 32); //FIXME
1893     }
1894
1895     encode_mb(s, motion_x, motion_y);
1896
1897     score= put_bits_count(&s->pb);
1898     if(s->data_partitioning){
1899         score+= put_bits_count(&s->pb2);
1900         score+= put_bits_count(&s->tex_pb);
1901     }
1902
1903     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
1904         MPV_decode_mb(s, s->block);
1905
1906         score *= s->lambda2;
1907         score += sse_mb(s) << FF_LAMBDA_SHIFT;
1908     }
1909
1910     if(*next_block){
1911         memcpy(s->dest, dest_backup, sizeof(s->dest));
1912     }
1913
1914     if(score<*dmin){
1915         *dmin= score;
1916         *next_block^=1;
1917
1918         copy_context_after_encode(best, s, type);
1919     }
1920 }
1921
1922 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
1923     uint32_t *sq = ff_squareTbl + 256;
1924     int acc=0;
1925     int x,y;
1926
1927     if(w==16 && h==16)
1928         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
1929     else if(w==8 && h==8)
1930         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
1931
1932     for(y=0; y<h; y++){
1933         for(x=0; x<w; x++){
1934             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
1935         }
1936     }
1937
1938     assert(acc>=0);
1939
1940     return acc;
1941 }
1942
1943 static int sse_mb(MpegEncContext *s){
1944     int w= 16;
1945     int h= 16;
1946
1947     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
1948     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
1949
1950     if(w==16 && h==16)
1951       if(s->avctx->mb_cmp == FF_CMP_NSSE){
1952         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
1953                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
1954                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
1955       }else{
1956         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
1957                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
1958                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
1959       }
1960     else
1961         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
1962                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
1963                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
1964 }
1965
1966 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
1967     MpegEncContext *s= *(void**)arg;
1968
1969
1970     s->me.pre_pass=1;
1971     s->me.dia_size= s->avctx->pre_dia_size;
1972     s->first_slice_line=1;
1973     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
1974         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
1975             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
1976         }
1977         s->first_slice_line=0;
1978     }
1979
1980     s->me.pre_pass=0;
1981
1982     return 0;
1983 }
1984
1985 static int estimate_motion_thread(AVCodecContext *c, void *arg){
1986     MpegEncContext *s= *(void**)arg;
1987
1988     ff_check_alignment();
1989
1990     s->me.dia_size= s->avctx->dia_size;
1991     s->first_slice_line=1;
1992     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
1993         s->mb_x=0; //for block init below
1994         ff_init_block_index(s);
1995         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
1996             s->block_index[0]+=2;
1997             s->block_index[1]+=2;
1998             s->block_index[2]+=2;
1999             s->block_index[3]+=2;
2000
2001             /* compute motion vector & mb_type and store in context */
2002             if(s->pict_type==AV_PICTURE_TYPE_B)
2003                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2004             else
2005                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2006         }
2007         s->first_slice_line=0;
2008     }
2009     return 0;
2010 }
2011
2012 static int mb_var_thread(AVCodecContext *c, void *arg){
2013     MpegEncContext *s= *(void**)arg;
2014     int mb_x, mb_y;
2015
2016     ff_check_alignment();
2017
2018     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2019         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2020             int xx = mb_x * 16;
2021             int yy = mb_y * 16;
2022             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2023             int varc;
2024             int sum = s->dsp.pix_sum(pix, s->linesize);
2025
2026             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2027
2028             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2029             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2030             s->me.mb_var_sum_temp    += varc;
2031         }
2032     }
2033     return 0;
2034 }
2035
2036 static void write_slice_end(MpegEncContext *s){
2037     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2038         if(s->partitioned_frame){
2039             ff_mpeg4_merge_partitions(s);
2040         }
2041
2042         ff_mpeg4_stuffing(&s->pb);
2043     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2044         ff_mjpeg_encode_stuffing(&s->pb);
2045     }
2046
2047     align_put_bits(&s->pb);
2048     flush_put_bits(&s->pb);
2049
2050     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2051         s->misc_bits+= get_bits_diff(s);
2052 }
2053
2054 static int encode_thread(AVCodecContext *c, void *arg){
2055     MpegEncContext *s= *(void**)arg;
2056     int mb_x, mb_y, pdif = 0;
2057     int chr_h= 16>>s->chroma_y_shift;
2058     int i, j;
2059     MpegEncContext best_s, backup_s;
2060     uint8_t bit_buf[2][MAX_MB_BYTES];
2061     uint8_t bit_buf2[2][MAX_MB_BYTES];
2062     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2063     PutBitContext pb[2], pb2[2], tex_pb[2];
2064 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2065
2066     ff_check_alignment();
2067
2068     for(i=0; i<2; i++){
2069         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2070         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2071         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2072     }
2073
2074     s->last_bits= put_bits_count(&s->pb);
2075     s->mv_bits=0;
2076     s->misc_bits=0;
2077     s->i_tex_bits=0;
2078     s->p_tex_bits=0;
2079     s->i_count=0;
2080     s->f_count=0;
2081     s->b_count=0;
2082     s->skip_count=0;
2083
2084     for(i=0; i<3; i++){
2085         /* init last dc values */
2086         /* note: quant matrix value (8) is implied here */
2087         s->last_dc[i] = 128 << s->intra_dc_precision;
2088
2089         s->current_picture.f.error[i] = 0;
2090     }
2091     s->mb_skip_run = 0;
2092     memset(s->last_mv, 0, sizeof(s->last_mv));
2093
2094     s->last_mv_dir = 0;
2095
2096     switch(s->codec_id){
2097     case CODEC_ID_H263:
2098     case CODEC_ID_H263P:
2099     case CODEC_ID_FLV1:
2100         if (CONFIG_H263_ENCODER)
2101             s->gob_index = ff_h263_get_gob_height(s);
2102         break;
2103     case CODEC_ID_MPEG4:
2104         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2105             ff_mpeg4_init_partitions(s);
2106         break;
2107     }
2108
2109     s->resync_mb_x=0;
2110     s->resync_mb_y=0;
2111     s->first_slice_line = 1;
2112     s->ptr_lastgob = s->pb.buf;
2113     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2114 //    printf("row %d at %X\n", s->mb_y, (int)s);
2115         s->mb_x=0;
2116         s->mb_y= mb_y;
2117
2118         ff_set_qscale(s, s->qscale);
2119         ff_init_block_index(s);
2120
2121         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2122             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2123             int mb_type= s->mb_type[xy];
2124 //            int d;
2125             int dmin= INT_MAX;
2126             int dir;
2127
2128             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2129                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2130                 return -1;
2131             }
2132             if(s->data_partitioning){
2133                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2134                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2135                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2136                     return -1;
2137                 }
2138             }
2139
2140             s->mb_x = mb_x;
2141             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2142             ff_update_block_index(s);
2143
2144             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2145                 ff_h261_reorder_mb_index(s);
2146                 xy= s->mb_y*s->mb_stride + s->mb_x;
2147                 mb_type= s->mb_type[xy];
2148             }
2149
2150             /* write gob / video packet header  */
2151             if(s->rtp_mode){
2152                 int current_packet_size, is_gob_start;
2153
2154                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2155
2156                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2157
2158                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2159
2160                 switch(s->codec_id){
2161                 case CODEC_ID_H263:
2162                 case CODEC_ID_H263P:
2163                     if(!s->h263_slice_structured)
2164                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2165                     break;
2166                 case CODEC_ID_MPEG2VIDEO:
2167                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2168                 case CODEC_ID_MPEG1VIDEO:
2169                     if(s->mb_skip_run) is_gob_start=0;
2170                     break;
2171                 }
2172
2173                 if(is_gob_start){
2174                     if(s->start_mb_y != mb_y || mb_x!=0){
2175                         write_slice_end(s);
2176
2177                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2178                             ff_mpeg4_init_partitions(s);
2179                         }
2180                     }
2181
2182                     assert((put_bits_count(&s->pb)&7) == 0);
2183                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2184
2185                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2186                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2187                         int d= 100 / s->avctx->error_rate;
2188                         if(r % d == 0){
2189                             current_packet_size=0;
2190                             s->pb.buf_ptr= s->ptr_lastgob;
2191                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2192                         }
2193                     }
2194
2195                     if (s->avctx->rtp_callback){
2196                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2197                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2198                     }
2199
2200                     switch(s->codec_id){
2201                     case CODEC_ID_MPEG4:
2202                         if (CONFIG_MPEG4_ENCODER) {
2203                             ff_mpeg4_encode_video_packet_header(s);
2204                             ff_mpeg4_clean_buffers(s);
2205                         }
2206                     break;
2207                     case CODEC_ID_MPEG1VIDEO:
2208                     case CODEC_ID_MPEG2VIDEO:
2209                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2210                             ff_mpeg1_encode_slice_header(s);
2211                             ff_mpeg1_clean_buffers(s);
2212                         }
2213                     break;
2214                     case CODEC_ID_H263:
2215                     case CODEC_ID_H263P:
2216                         if (CONFIG_H263_ENCODER)
2217                             h263_encode_gob_header(s, mb_y);
2218                     break;
2219                     }
2220
2221                     if(s->flags&CODEC_FLAG_PASS1){
2222                         int bits= put_bits_count(&s->pb);
2223                         s->misc_bits+= bits - s->last_bits;
2224                         s->last_bits= bits;
2225                     }
2226
2227                     s->ptr_lastgob += current_packet_size;
2228                     s->first_slice_line=1;
2229                     s->resync_mb_x=mb_x;
2230                     s->resync_mb_y=mb_y;
2231                 }
2232             }
2233
2234             if(  (s->resync_mb_x   == s->mb_x)
2235                && s->resync_mb_y+1 == s->mb_y){
2236                 s->first_slice_line=0;
2237             }
2238
2239             s->mb_skipped=0;
2240             s->dquant=0; //only for QP_RD
2241
2242             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2243                 int next_block=0;
2244                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2245
2246                 copy_context_before_encode(&backup_s, s, -1);
2247                 backup_s.pb= s->pb;
2248                 best_s.data_partitioning= s->data_partitioning;
2249                 best_s.partitioned_frame= s->partitioned_frame;
2250                 if(s->data_partitioning){
2251                     backup_s.pb2= s->pb2;
2252                     backup_s.tex_pb= s->tex_pb;
2253                 }
2254
2255                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2256                     s->mv_dir = MV_DIR_FORWARD;
2257                     s->mv_type = MV_TYPE_16X16;
2258                     s->mb_intra= 0;
2259                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2260                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2261                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2262                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2263                 }
2264                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2265                     s->mv_dir = MV_DIR_FORWARD;
2266                     s->mv_type = MV_TYPE_FIELD;
2267                     s->mb_intra= 0;
2268                     for(i=0; i<2; i++){
2269                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2270                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2271                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2272                     }
2273                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2274                                  &dmin, &next_block, 0, 0);
2275                 }
2276                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2277                     s->mv_dir = MV_DIR_FORWARD;
2278                     s->mv_type = MV_TYPE_16X16;
2279                     s->mb_intra= 0;
2280                     s->mv[0][0][0] = 0;
2281                     s->mv[0][0][1] = 0;
2282                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2283                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2284                 }
2285                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2286                     s->mv_dir = MV_DIR_FORWARD;
2287                     s->mv_type = MV_TYPE_8X8;
2288                     s->mb_intra= 0;
2289                     for(i=0; i<4; i++){
2290                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2291                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2292                     }
2293                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2294                                  &dmin, &next_block, 0, 0);
2295                 }
2296                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2297                     s->mv_dir = MV_DIR_FORWARD;
2298                     s->mv_type = MV_TYPE_16X16;
2299                     s->mb_intra= 0;
2300                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2301                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2302                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2303                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2304                 }
2305                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2306                     s->mv_dir = MV_DIR_BACKWARD;
2307                     s->mv_type = MV_TYPE_16X16;
2308                     s->mb_intra= 0;
2309                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2310                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2311                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2312                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2313                 }
2314                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2315                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2316                     s->mv_type = MV_TYPE_16X16;
2317                     s->mb_intra= 0;
2318                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2319                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2320                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2321                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2322                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2323                                  &dmin, &next_block, 0, 0);
2324                 }
2325                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2326                     s->mv_dir = MV_DIR_FORWARD;
2327                     s->mv_type = MV_TYPE_FIELD;
2328                     s->mb_intra= 0;
2329                     for(i=0; i<2; i++){
2330                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2331                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2332                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2333                     }
2334                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2335                                  &dmin, &next_block, 0, 0);
2336                 }
2337                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2338                     s->mv_dir = MV_DIR_BACKWARD;
2339                     s->mv_type = MV_TYPE_FIELD;
2340                     s->mb_intra= 0;
2341                     for(i=0; i<2; i++){
2342                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2343                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2344                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2345                     }
2346                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2347                                  &dmin, &next_block, 0, 0);
2348                 }
2349                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2350                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2351                     s->mv_type = MV_TYPE_FIELD;
2352                     s->mb_intra= 0;
2353                     for(dir=0; dir<2; dir++){
2354                         for(i=0; i<2; i++){
2355                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2356                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2357                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2358                         }
2359                     }
2360                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2361                                  &dmin, &next_block, 0, 0);
2362                 }
2363                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2364                     s->mv_dir = 0;
2365                     s->mv_type = MV_TYPE_16X16;
2366                     s->mb_intra= 1;
2367                     s->mv[0][0][0] = 0;
2368                     s->mv[0][0][1] = 0;
2369                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2370                                  &dmin, &next_block, 0, 0);
2371                     if(s->h263_pred || s->h263_aic){
2372                         if(best_s.mb_intra)
2373                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2374                         else
2375                             ff_clean_intra_table_entries(s); //old mode?
2376                     }
2377                 }
2378
2379                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2380                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2381                         const int last_qp= backup_s.qscale;
2382                         int qpi, qp, dc[6];
2383                         DCTELEM ac[6][16];
2384                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2385                         static const int dquant_tab[4]={-1,1,-2,2};
2386
2387                         assert(backup_s.dquant == 0);
2388
2389                         //FIXME intra
2390                         s->mv_dir= best_s.mv_dir;
2391                         s->mv_type = MV_TYPE_16X16;
2392                         s->mb_intra= best_s.mb_intra;
2393                         s->mv[0][0][0] = best_s.mv[0][0][0];
2394                         s->mv[0][0][1] = best_s.mv[0][0][1];
2395                         s->mv[1][0][0] = best_s.mv[1][0][0];
2396                         s->mv[1][0][1] = best_s.mv[1][0][1];
2397
2398                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2399                         for(; qpi<4; qpi++){
2400                             int dquant= dquant_tab[qpi];
2401                             qp= last_qp + dquant;
2402                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2403                                 continue;
2404                             backup_s.dquant= dquant;
2405                             if(s->mb_intra && s->dc_val[0]){
2406                                 for(i=0; i<6; i++){
2407                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2408                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2409                                 }
2410                             }
2411
2412                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2413                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2414                             if(best_s.qscale != qp){
2415                                 if(s->mb_intra && s->dc_val[0]){
2416                                     for(i=0; i<6; i++){
2417                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2418                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2419                                     }
2420                                 }
2421                             }
2422                         }
2423                     }
2424                 }
2425                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2426                     int mx= s->b_direct_mv_table[xy][0];
2427                     int my= s->b_direct_mv_table[xy][1];
2428
2429                     backup_s.dquant = 0;
2430                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2431                     s->mb_intra= 0;
2432                     ff_mpeg4_set_direct_mv(s, mx, my);
2433                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2434                                  &dmin, &next_block, mx, my);
2435                 }
2436                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2437                     backup_s.dquant = 0;
2438                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2439                     s->mb_intra= 0;
2440                     ff_mpeg4_set_direct_mv(s, 0, 0);
2441                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2442                                  &dmin, &next_block, 0, 0);
2443                 }
2444                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2445                     int coded=0;
2446                     for(i=0; i<6; i++)
2447                         coded |= s->block_last_index[i];
2448                     if(coded){
2449                         int mx,my;
2450                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2451                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2452                             mx=my=0; //FIXME find the one we actually used
2453                             ff_mpeg4_set_direct_mv(s, mx, my);
2454                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2455                             mx= s->mv[1][0][0];
2456                             my= s->mv[1][0][1];
2457                         }else{
2458                             mx= s->mv[0][0][0];
2459                             my= s->mv[0][0][1];
2460                         }
2461
2462                         s->mv_dir= best_s.mv_dir;
2463                         s->mv_type = best_s.mv_type;
2464                         s->mb_intra= 0;
2465 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2466                         s->mv[0][0][1] = best_s.mv[0][0][1];
2467                         s->mv[1][0][0] = best_s.mv[1][0][0];
2468                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2469                         backup_s.dquant= 0;
2470                         s->skipdct=1;
2471                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2472                                         &dmin, &next_block, mx, my);
2473                         s->skipdct=0;
2474                     }
2475                 }
2476
2477                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2478
2479                 copy_context_after_encode(s, &best_s, -1);
2480
2481                 pb_bits_count= put_bits_count(&s->pb);
2482                 flush_put_bits(&s->pb);
2483                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2484                 s->pb= backup_s.pb;
2485
2486                 if(s->data_partitioning){
2487                     pb2_bits_count= put_bits_count(&s->pb2);
2488                     flush_put_bits(&s->pb2);
2489                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2490                     s->pb2= backup_s.pb2;
2491
2492                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2493                     flush_put_bits(&s->tex_pb);
2494                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2495                     s->tex_pb= backup_s.tex_pb;
2496                 }
2497                 s->last_bits= put_bits_count(&s->pb);
2498
2499                 if (CONFIG_H263_ENCODER &&
2500                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2501                     ff_h263_update_motion_val(s);
2502
2503                 if(next_block==0){ //FIXME 16 vs linesize16
2504                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2505                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2506                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2507                 }
2508
2509                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2510                     MPV_decode_mb(s, s->block);
2511             } else {
2512                 int motion_x = 0, motion_y = 0;
2513                 s->mv_type=MV_TYPE_16X16;
2514                 // only one MB-Type possible
2515
2516                 switch(mb_type){
2517                 case CANDIDATE_MB_TYPE_INTRA:
2518                     s->mv_dir = 0;
2519                     s->mb_intra= 1;
2520                     motion_x= s->mv[0][0][0] = 0;
2521                     motion_y= s->mv[0][0][1] = 0;
2522                     break;
2523                 case CANDIDATE_MB_TYPE_INTER:
2524                     s->mv_dir = MV_DIR_FORWARD;
2525                     s->mb_intra= 0;
2526                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2527                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2528                     break;
2529                 case CANDIDATE_MB_TYPE_INTER_I:
2530                     s->mv_dir = MV_DIR_FORWARD;
2531                     s->mv_type = MV_TYPE_FIELD;
2532                     s->mb_intra= 0;
2533                     for(i=0; i<2; i++){
2534                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2535                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2536                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2537                     }
2538                     break;
2539                 case CANDIDATE_MB_TYPE_INTER4V:
2540                     s->mv_dir = MV_DIR_FORWARD;
2541                     s->mv_type = MV_TYPE_8X8;
2542                     s->mb_intra= 0;
2543                     for(i=0; i<4; i++){
2544                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2545                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2546                     }
2547                     break;
2548                 case CANDIDATE_MB_TYPE_DIRECT:
2549                     if (CONFIG_MPEG4_ENCODER) {
2550                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2551                         s->mb_intra= 0;
2552                         motion_x=s->b_direct_mv_table[xy][0];
2553                         motion_y=s->b_direct_mv_table[xy][1];
2554                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2555                     }
2556                     break;
2557                 case CANDIDATE_MB_TYPE_DIRECT0:
2558                     if (CONFIG_MPEG4_ENCODER) {
2559                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2560                         s->mb_intra= 0;
2561                         ff_mpeg4_set_direct_mv(s, 0, 0);
2562                     }
2563                     break;
2564                 case CANDIDATE_MB_TYPE_BIDIR:
2565                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2566                     s->mb_intra= 0;
2567                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2568                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2569                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2570                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2571                     break;
2572                 case CANDIDATE_MB_TYPE_BACKWARD:
2573                     s->mv_dir = MV_DIR_BACKWARD;
2574                     s->mb_intra= 0;
2575                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2576                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2577                     break;
2578                 case CANDIDATE_MB_TYPE_FORWARD:
2579                     s->mv_dir = MV_DIR_FORWARD;
2580                     s->mb_intra= 0;
2581                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2582                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2583 //                    printf(" %d %d ", motion_x, motion_y);
2584                     break;
2585                 case CANDIDATE_MB_TYPE_FORWARD_I:
2586                     s->mv_dir = MV_DIR_FORWARD;
2587                     s->mv_type = MV_TYPE_FIELD;
2588                     s->mb_intra= 0;
2589                     for(i=0; i<2; i++){
2590                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2591                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2592                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2593                     }
2594                     break;
2595                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2596                     s->mv_dir = MV_DIR_BACKWARD;
2597                     s->mv_type = MV_TYPE_FIELD;
2598                     s->mb_intra= 0;
2599                     for(i=0; i<2; i++){
2600                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2601                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2602                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2603                     }
2604                     break;
2605                 case CANDIDATE_MB_TYPE_BIDIR_I:
2606                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2607                     s->mv_type = MV_TYPE_FIELD;
2608                     s->mb_intra= 0;
2609                     for(dir=0; dir<2; dir++){
2610                         for(i=0; i<2; i++){
2611                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2612                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2613                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2614                         }
2615                     }
2616                     break;
2617                 default:
2618                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2619                 }
2620
2621                 encode_mb(s, motion_x, motion_y);
2622
2623                 // RAL: Update last macroblock type
2624                 s->last_mv_dir = s->mv_dir;
2625
2626                 if (CONFIG_H263_ENCODER &&
2627                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2628                     ff_h263_update_motion_val(s);
2629
2630                 MPV_decode_mb(s, s->block);
2631             }
2632
2633             /* clean the MV table in IPS frames for direct mode in B frames */
2634             if(s->mb_intra /* && I,P,S_TYPE */){
2635                 s->p_mv_table[xy][0]=0;
2636                 s->p_mv_table[xy][1]=0;
2637             }
2638
2639             if(s->flags&CODEC_FLAG_PSNR){
2640                 int w= 16;
2641                 int h= 16;
2642
2643                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2644                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2645
2646                 s->current_picture.f.error[0] += sse(
2647                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2648                     s->dest[0], w, h, s->linesize);
2649                 s->current_picture.f.error[1] += sse(
2650                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2651                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2652                 s->current_picture.f.error[2] += sse(
2653                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2654                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2655             }
2656             if(s->loop_filter){
2657                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2658                     ff_h263_loop_filter(s);
2659             }
2660 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2661         }
2662     }
2663
2664     //not beautiful here but we must write it before flushing so it has to be here
2665     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2666         msmpeg4_encode_ext_header(s);
2667
2668     write_slice_end(s);
2669
2670     /* Send the last GOB if RTP */
2671     if (s->avctx->rtp_callback) {
2672         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2673         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2674         /* Call the RTP callback to send the last GOB */
2675         emms_c();
2676         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2677     }
2678
2679     return 0;
2680 }
2681
2682 #define MERGE(field) dst->field += src->field; src->field=0
2683 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2684     MERGE(me.scene_change_score);
2685     MERGE(me.mc_mb_var_sum_temp);
2686     MERGE(me.mb_var_sum_temp);
2687 }
2688
2689 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2690     int i;
2691
2692     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2693     MERGE(dct_count[1]);
2694     MERGE(mv_bits);
2695     MERGE(i_tex_bits);
2696     MERGE(p_tex_bits);
2697     MERGE(i_count);
2698     MERGE(f_count);
2699     MERGE(b_count);
2700     MERGE(skip_count);
2701     MERGE(misc_bits);
2702     MERGE(error_count);
2703     MERGE(padding_bug_score);
2704     MERGE(current_picture.f.error[0]);
2705     MERGE(current_picture.f.error[1]);
2706     MERGE(current_picture.f.error[2]);
2707
2708     if(dst->avctx->noise_reduction){
2709         for(i=0; i<64; i++){
2710             MERGE(dct_error_sum[0][i]);
2711             MERGE(dct_error_sum[1][i]);
2712         }
2713     }
2714
2715     assert(put_bits_count(&src->pb) % 8 ==0);
2716     assert(put_bits_count(&dst->pb) % 8 ==0);
2717     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2718     flush_put_bits(&dst->pb);
2719 }
2720
2721 static int estimate_qp(MpegEncContext *s, int dry_run){
2722     if (s->next_lambda){
2723         s->current_picture_ptr->f.quality =
2724         s->current_picture.f.quality = s->next_lambda;
2725         if(!dry_run) s->next_lambda= 0;
2726     } else if (!s->fixed_qscale) {
2727         s->current_picture_ptr->f.quality =
2728         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2729         if (s->current_picture.f.quality < 0)
2730             return -1;
2731     }
2732
2733     if(s->adaptive_quant){
2734         switch(s->codec_id){
2735         case CODEC_ID_MPEG4:
2736             if (CONFIG_MPEG4_ENCODER)
2737                 ff_clean_mpeg4_qscales(s);
2738             break;
2739         case CODEC_ID_H263:
2740         case CODEC_ID_H263P:
2741         case CODEC_ID_FLV1:
2742             if (CONFIG_H263_ENCODER)
2743                 ff_clean_h263_qscales(s);
2744             break;
2745         default:
2746             ff_init_qscale_tab(s);
2747         }
2748
2749         s->lambda= s->lambda_table[0];
2750         //FIXME broken
2751     }else
2752         s->lambda = s->current_picture.f.quality;
2753 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
2754     update_qscale(s);
2755     return 0;
2756 }
2757
2758 /* must be called before writing the header */
2759 static void set_frame_distances(MpegEncContext * s){
2760     assert(s->current_picture_ptr->pts != AV_NOPTS_VALUE);
2761     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
2762
2763     if(s->pict_type==AV_PICTURE_TYPE_B){
2764         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
2765         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
2766     }else{
2767         s->pp_time= s->time - s->last_non_b_time;
2768         s->last_non_b_time= s->time;
2769         assert(s->picture_number==0 || s->pp_time > 0);
2770     }
2771 }
2772
2773 static int encode_picture(MpegEncContext *s, int picture_number)
2774 {
2775     int i;
2776     int bits;
2777     int context_count = s->avctx->thread_count;
2778
2779     s->picture_number = picture_number;
2780
2781     /* Reset the average MB variance */
2782     s->me.mb_var_sum_temp    =
2783     s->me.mc_mb_var_sum_temp = 0;
2784
2785     /* we need to initialize some time vars before we can encode b-frames */
2786     // RAL: Condition added for MPEG1VIDEO
2787     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
2788         set_frame_distances(s);
2789     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
2790         ff_set_mpeg4_time(s);
2791
2792     s->me.scene_change_score=0;
2793
2794 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
2795
2796     if(s->pict_type==AV_PICTURE_TYPE_I){
2797         if(s->msmpeg4_version >= 3) s->no_rounding=1;
2798         else                        s->no_rounding=0;
2799     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
2800         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2801             s->no_rounding ^= 1;
2802     }
2803
2804     if(s->flags & CODEC_FLAG_PASS2){
2805         if (estimate_qp(s,1) < 0)
2806             return -1;
2807         ff_get_2pass_fcode(s);
2808     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
2809         if(s->pict_type==AV_PICTURE_TYPE_B)
2810             s->lambda= s->last_lambda_for[s->pict_type];
2811         else
2812             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
2813         update_qscale(s);
2814     }
2815
2816     s->mb_intra=0; //for the rate distortion & bit compare functions
2817     for(i=1; i<context_count; i++){
2818         ff_update_duplicate_context(s->thread_context[i], s);
2819     }
2820
2821     if(ff_init_me(s)<0)
2822         return -1;
2823
2824     /* Estimate motion for every MB */
2825     if(s->pict_type != AV_PICTURE_TYPE_I){
2826         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
2827         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
2828         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
2829             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
2830                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2831             }
2832         }
2833
2834         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2835     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
2836         /* I-Frame */
2837         for(i=0; i<s->mb_stride*s->mb_height; i++)
2838             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2839
2840         if(!s->fixed_qscale){
2841             /* finding spatial complexity for I-frame rate control */
2842             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2843         }
2844     }
2845     for(i=1; i<context_count; i++){
2846         merge_context_after_me(s, s->thread_context[i]);
2847     }
2848     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
2849     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
2850     emms_c();
2851
2852     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
2853         s->pict_type= AV_PICTURE_TYPE_I;
2854         for(i=0; i<s->mb_stride*s->mb_height; i++)
2855             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2856 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2857     }
2858
2859     if(!s->umvplus){
2860         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
2861             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
2862
2863             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2864                 int a,b;
2865                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
2866                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
2867                 s->f_code= FFMAX3(s->f_code, a, b);
2868             }
2869
2870             ff_fix_long_p_mvs(s);
2871             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
2872             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2873                 int j;
2874                 for(i=0; i<2; i++){
2875                     for(j=0; j<2; j++)
2876                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
2877                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
2878                 }
2879             }
2880         }
2881
2882         if(s->pict_type==AV_PICTURE_TYPE_B){
2883             int a, b;
2884
2885             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
2886             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
2887             s->f_code = FFMAX(a, b);
2888
2889             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
2890             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
2891             s->b_code = FFMAX(a, b);
2892
2893             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
2894             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
2895             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
2896             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
2897             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2898                 int dir, j;
2899                 for(dir=0; dir<2; dir++){
2900                     for(i=0; i<2; i++){
2901                         for(j=0; j<2; j++){
2902                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
2903                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
2904                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
2905                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
2906                         }
2907                     }
2908                 }
2909             }
2910         }
2911     }
2912
2913     if (estimate_qp(s, 0) < 0)
2914         return -1;
2915
2916     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
2917         s->qscale= 3; //reduce clipping problems
2918
2919     if (s->out_format == FMT_MJPEG) {
2920         /* for mjpeg, we do include qscale in the matrix */
2921         for(i=1;i<64;i++){
2922             int j= s->dsp.idct_permutation[i];
2923
2924             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2925         }
2926         s->y_dc_scale_table=
2927         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
2928         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
2929         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
2930                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
2931         s->qscale= 8;
2932     }
2933
2934     //FIXME var duplication
2935     s->current_picture_ptr->f.key_frame =
2936     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
2937     s->current_picture_ptr->f.pict_type =
2938     s->current_picture.f.pict_type = s->pict_type;
2939
2940     if (s->current_picture.f.key_frame)
2941         s->picture_in_gop_number=0;
2942
2943     s->last_bits= put_bits_count(&s->pb);
2944     switch(s->out_format) {
2945     case FMT_MJPEG:
2946         if (CONFIG_MJPEG_ENCODER)
2947             ff_mjpeg_encode_picture_header(s);
2948         break;
2949     case FMT_H261:
2950         if (CONFIG_H261_ENCODER)
2951             ff_h261_encode_picture_header(s, picture_number);
2952         break;
2953     case FMT_H263:
2954         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
2955             ff_wmv2_encode_picture_header(s, picture_number);
2956         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
2957             msmpeg4_encode_picture_header(s, picture_number);
2958         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
2959             mpeg4_encode_picture_header(s, picture_number);
2960         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
2961             rv10_encode_picture_header(s, picture_number);
2962         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
2963             rv20_encode_picture_header(s, picture_number);
2964         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
2965             ff_flv_encode_picture_header(s, picture_number);
2966         else if (CONFIG_H263_ENCODER)
2967             h263_encode_picture_header(s, picture_number);
2968         break;
2969     case FMT_MPEG1:
2970         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2971             mpeg1_encode_picture_header(s, picture_number);
2972         break;
2973     case FMT_H264:
2974         break;
2975     default:
2976         assert(0);
2977     }
2978     bits= put_bits_count(&s->pb);
2979     s->header_bits= bits - s->last_bits;
2980
2981     for(i=1; i<context_count; i++){
2982         update_duplicate_context_after_me(s->thread_context[i], s);
2983     }
2984     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2985     for(i=1; i<context_count; i++){
2986         merge_context_after_encode(s, s->thread_context[i]);
2987     }
2988     emms_c();
2989     return 0;
2990 }
2991
2992 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
2993     const int intra= s->mb_intra;
2994     int i;
2995
2996     s->dct_count[intra]++;
2997
2998     for(i=0; i<64; i++){
2999         int level= block[i];
3000
3001         if(level){
3002             if(level>0){
3003                 s->dct_error_sum[intra][i] += level;
3004                 level -= s->dct_offset[intra][i];
3005                 if(level<0) level=0;
3006             }else{
3007                 s->dct_error_sum[intra][i] -= level;
3008                 level += s->dct_offset[intra][i];
3009                 if(level>0) level=0;
3010             }
3011             block[i]= level;
3012         }
3013     }
3014 }
3015
3016 static int dct_quantize_trellis_c(MpegEncContext *s,
3017                                   DCTELEM *block, int n,
3018                                   int qscale, int *overflow){
3019     const int *qmat;
3020     const uint8_t *scantable= s->intra_scantable.scantable;
3021     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3022     int max=0;
3023     unsigned int threshold1, threshold2;
3024     int bias=0;
3025     int run_tab[65];
3026     int level_tab[65];
3027     int score_tab[65];
3028     int survivor[65];
3029     int survivor_count;
3030     int last_run=0;
3031     int last_level=0;
3032     int last_score= 0;
3033     int last_i;
3034     int coeff[2][64];
3035     int coeff_count[64];
3036     int qmul, qadd, start_i, last_non_zero, i, dc;
3037     const int esc_length= s->ac_esc_length;
3038     uint8_t * length;
3039     uint8_t * last_length;
3040     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3041
3042     s->dsp.fdct (block);
3043
3044     if(s->dct_error_sum)
3045         s->denoise_dct(s, block);
3046     qmul= qscale*16;
3047     qadd= ((qscale-1)|1)*8;
3048
3049     if (s->mb_intra) {
3050         int q;
3051         if (!s->h263_aic) {
3052             if (n < 4)
3053                 q = s->y_dc_scale;
3054             else
3055                 q = s->c_dc_scale;
3056             q = q << 3;
3057         } else{
3058             /* For AIC we skip quant/dequant of INTRADC */
3059             q = 1 << 3;
3060             qadd=0;
3061         }
3062
3063         /* note: block[0] is assumed to be positive */
3064         block[0] = (block[0] + (q >> 1)) / q;
3065         start_i = 1;
3066         last_non_zero = 0;
3067         qmat = s->q_intra_matrix[qscale];
3068         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3069             bias= 1<<(QMAT_SHIFT-1);
3070         length     = s->intra_ac_vlc_length;
3071         last_length= s->intra_ac_vlc_last_length;
3072     } else {
3073         start_i = 0;
3074         last_non_zero = -1;
3075         qmat = s->q_inter_matrix[qscale];
3076         length     = s->inter_ac_vlc_length;
3077         last_length= s->inter_ac_vlc_last_length;
3078     }
3079     last_i= start_i;
3080
3081     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3082     threshold2= (threshold1<<1);
3083
3084     for(i=63; i>=start_i; i--) {
3085         const int j = scantable[i];
3086         int level = block[j] * qmat[j];
3087
3088         if(((unsigned)(level+threshold1))>threshold2){
3089             last_non_zero = i;
3090             break;
3091         }
3092     }
3093
3094     for(i=start_i; i<=last_non_zero; i++) {
3095         const int j = scantable[i];
3096         int level = block[j] * qmat[j];
3097
3098 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3099 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3100         if(((unsigned)(level+threshold1))>threshold2){
3101             if(level>0){
3102                 level= (bias + level)>>QMAT_SHIFT;
3103                 coeff[0][i]= level;
3104                 coeff[1][i]= level-1;
3105 //                coeff[2][k]= level-2;
3106             }else{
3107                 level= (bias - level)>>QMAT_SHIFT;
3108                 coeff[0][i]= -level;
3109                 coeff[1][i]= -level+1;
3110 //                coeff[2][k]= -level+2;
3111             }
3112             coeff_count[i]= FFMIN(level, 2);
3113             assert(coeff_count[i]);
3114             max |=level;
3115         }else{
3116             coeff[0][i]= (level>>31)|1;
3117             coeff_count[i]= 1;
3118         }
3119     }
3120
3121     *overflow= s->max_qcoeff < max; //overflow might have happened
3122
3123     if(last_non_zero < start_i){
3124         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3125         return last_non_zero;
3126     }
3127
3128     score_tab[start_i]= 0;
3129     survivor[0]= start_i;
3130     survivor_count= 1;
3131
3132     for(i=start_i; i<=last_non_zero; i++){
3133         int level_index, j, zero_distortion;
3134         int dct_coeff= FFABS(block[ scantable[i] ]);
3135         int best_score=256*256*256*120;
3136
3137         if (   s->dsp.fdct == fdct_ifast
3138 #ifndef FAAN_POSTSCALE
3139             || s->dsp.fdct == ff_faandct
3140 #endif
3141            )
3142             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3143         zero_distortion= dct_coeff*dct_coeff;
3144
3145         for(level_index=0; level_index < coeff_count[i]; level_index++){
3146             int distortion;
3147             int level= coeff[level_index][i];
3148             const int alevel= FFABS(level);
3149             int unquant_coeff;
3150
3151             assert(level);
3152
3153             if(s->out_format == FMT_H263){
3154                 unquant_coeff= alevel*qmul + qadd;
3155             }else{ //MPEG1
3156                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3157                 if(s->mb_intra){
3158                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3159                         unquant_coeff =   (unquant_coeff - 1) | 1;
3160                 }else{
3161                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3162                         unquant_coeff =   (unquant_coeff - 1) | 1;
3163                 }
3164                 unquant_coeff<<= 3;
3165             }
3166
3167             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3168             level+=64;
3169             if((level&(~127)) == 0){
3170                 for(j=survivor_count-1; j>=0; j--){
3171                     int run= i - survivor[j];
3172                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3173                     score += score_tab[i-run];
3174
3175                     if(score < best_score){
3176                         best_score= score;
3177                         run_tab[i+1]= run;
3178                         level_tab[i+1]= level-64;
3179                     }
3180                 }
3181
3182                 if(s->out_format == FMT_H263){
3183                     for(j=survivor_count-1; j>=0; j--){
3184                         int run= i - survivor[j];
3185                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3186                         score += score_tab[i-run];
3187                         if(score < last_score){
3188                             last_score= score;
3189                             last_run= run;
3190                             last_level= level-64;
3191                             last_i= i+1;
3192                         }
3193                     }
3194                 }
3195             }else{
3196                 distortion += esc_length*lambda;
3197                 for(j=survivor_count-1; j>=0; j--){
3198                     int run= i - survivor[j];
3199                     int score= distortion + score_tab[i-run];
3200
3201                     if(score < best_score){
3202                         best_score= score;
3203                         run_tab[i+1]= run;
3204                         level_tab[i+1]= level-64;
3205                     }
3206                 }
3207
3208                 if(s->out_format == FMT_H263){
3209                   for(j=survivor_count-1; j>=0; j--){
3210                         int run= i - survivor[j];
3211                         int score= distortion + score_tab[i-run];
3212                         if(score < last_score){
3213                             last_score= score;
3214                             last_run= run;
3215                             last_level= level-64;
3216                             last_i= i+1;
3217                         }
3218                     }
3219                 }
3220             }
3221         }
3222
3223         score_tab[i+1]= best_score;
3224
3225         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3226         if(last_non_zero <= 27){
3227             for(; survivor_count; survivor_count--){
3228                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3229                     break;
3230             }
3231         }else{
3232             for(; survivor_count; survivor_count--){
3233                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3234                     break;
3235             }
3236         }
3237
3238         survivor[ survivor_count++ ]= i+1;
3239     }
3240
3241     if(s->out_format != FMT_H263){
3242         last_score= 256*256*256*120;
3243         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3244             int score= score_tab[i];
3245             if(i) score += lambda*2; //FIXME exacter?
3246
3247             if(score < last_score){
3248                 last_score= score;
3249                 last_i= i;
3250                 last_level= level_tab[i];
3251                 last_run= run_tab[i];
3252             }
3253         }
3254     }
3255
3256     s->coded_score[n] = last_score;
3257
3258     dc= FFABS(block[0]);
3259     last_non_zero= last_i - 1;
3260     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3261
3262     if(last_non_zero < start_i)
3263         return last_non_zero;
3264
3265     if(last_non_zero == 0 && start_i == 0){
3266         int best_level= 0;
3267         int best_score= dc * dc;
3268
3269         for(i=0; i<coeff_count[0]; i++){
3270             int level= coeff[i][0];
3271             int alevel= FFABS(level);
3272             int unquant_coeff, score, distortion;
3273
3274             if(s->out_format == FMT_H263){
3275                     unquant_coeff= (alevel*qmul + qadd)>>3;
3276             }else{ //MPEG1
3277                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3278                     unquant_coeff =   (unquant_coeff - 1) | 1;
3279             }
3280             unquant_coeff = (unquant_coeff + 4) >> 3;
3281             unquant_coeff<<= 3 + 3;
3282
3283             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3284             level+=64;
3285             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3286             else                    score= distortion + esc_length*lambda;
3287
3288             if(score < best_score){
3289                 best_score= score;
3290                 best_level= level - 64;
3291             }
3292         }
3293         block[0]= best_level;
3294         s->coded_score[n] = best_score - dc*dc;
3295         if(best_level == 0) return -1;
3296         else                return last_non_zero;
3297     }
3298
3299     i= last_i;
3300     assert(last_level);
3301
3302     block[ perm_scantable[last_non_zero] ]= last_level;
3303     i -= last_run + 1;
3304
3305     for(; i>start_i; i -= run_tab[i] + 1){
3306         block[ perm_scantable[i-1] ]= level_tab[i];
3307     }
3308
3309     return last_non_zero;
3310 }
3311
3312 //#define REFINE_STATS 1
3313 static int16_t basis[64][64];
3314
3315 static void build_basis(uint8_t *perm){
3316     int i, j, x, y;
3317     emms_c();
3318     for(i=0; i<8; i++){
3319         for(j=0; j<8; j++){
3320             for(y=0; y<8; y++){
3321                 for(x=0; x<8; x++){
3322                     double s= 0.25*(1<<BASIS_SHIFT);
3323                     int index= 8*i + j;
3324                     int perm_index= perm[index];
3325                     if(i==0) s*= sqrt(0.5);
3326                     if(j==0) s*= sqrt(0.5);
3327                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3328                 }
3329             }
3330         }
3331     }
3332 }
3333
3334 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3335                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3336                         int n, int qscale){
3337     int16_t rem[64];
3338     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3339     const uint8_t *scantable= s->intra_scantable.scantable;
3340     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3341 //    unsigned int threshold1, threshold2;
3342 //    int bias=0;
3343     int run_tab[65];
3344     int prev_run=0;
3345     int prev_level=0;
3346     int qmul, qadd, start_i, last_non_zero, i, dc;
3347     uint8_t * length;
3348     uint8_t * last_length;
3349     int lambda;
3350     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3351 #ifdef REFINE_STATS
3352 static int count=0;
3353 static int after_last=0;
3354 static int to_zero=0;
3355 static int from_zero=0;
3356 static int raise=0;
3357 static int lower=0;
3358 static int messed_sign=0;
3359 #endif
3360
3361     if(basis[0][0] == 0)
3362         build_basis(s->dsp.idct_permutation);
3363
3364     qmul= qscale*2;
3365     qadd= (qscale-1)|1;
3366     if (s->mb_intra) {
3367         if (!s->h263_aic) {
3368             if (n < 4)
3369                 q = s->y_dc_scale;
3370             else
3371                 q = s->c_dc_scale;
3372         } else{
3373             /* For AIC we skip quant/dequant of INTRADC */
3374             q = 1;
3375             qadd=0;
3376         }
3377         q <<= RECON_SHIFT-3;
3378         /* note: block[0] is assumed to be positive */
3379         dc= block[0]*q;
3380 //        block[0] = (block[0] + (q >> 1)) / q;
3381         start_i = 1;
3382 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3383 //            bias= 1<<(QMAT_SHIFT-1);
3384         length     = s->intra_ac_vlc_length;
3385         last_length= s->intra_ac_vlc_last_length;
3386     } else {
3387         dc= 0;
3388         start_i = 0;
3389         length     = s->inter_ac_vlc_length;
3390         last_length= s->inter_ac_vlc_last_length;
3391     }
3392     last_non_zero = s->block_last_index[n];
3393
3394 #ifdef REFINE_STATS
3395 {START_TIMER
3396 #endif
3397     dc += (1<<(RECON_SHIFT-1));
3398     for(i=0; i<64; i++){
3399         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3400     }
3401 #ifdef REFINE_STATS
3402 STOP_TIMER("memset rem[]")}
3403 #endif
3404     sum=0;
3405     for(i=0; i<64; i++){
3406         int one= 36;
3407         int qns=4;
3408         int w;
3409
3410         w= FFABS(weight[i]) + qns*one;
3411         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3412
3413         weight[i] = w;
3414 //        w=weight[i] = (63*qns + (w/2)) / w;
3415
3416         assert(w>0);
3417         assert(w<(1<<6));
3418         sum += w*w;
3419     }
3420     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3421 #ifdef REFINE_STATS
3422 {START_TIMER
3423 #endif
3424     run=0;
3425     rle_index=0;
3426     for(i=start_i; i<=last_non_zero; i++){
3427         int j= perm_scantable[i];
3428         const int level= block[j];
3429         int coeff;
3430
3431         if(level){
3432             if(level<0) coeff= qmul*level - qadd;
3433             else        coeff= qmul*level + qadd;
3434             run_tab[rle_index++]=run;
3435             run=0;
3436
3437             s->dsp.add_8x8basis(rem, basis[j], coeff);
3438         }else{
3439             run++;
3440         }
3441     }
3442 #ifdef REFINE_STATS
3443 if(last_non_zero>0){
3444 STOP_TIMER("init rem[]")
3445 }
3446 }
3447
3448 {START_TIMER
3449 #endif
3450     for(;;){
3451         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3452         int best_coeff=0;
3453         int best_change=0;
3454         int run2, best_unquant_change=0, analyze_gradient;
3455 #ifdef REFINE_STATS
3456 {START_TIMER
3457 #endif
3458         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3459
3460         if(analyze_gradient){
3461 #ifdef REFINE_STATS
3462 {START_TIMER
3463 #endif
3464             for(i=0; i<64; i++){
3465                 int w= weight[i];
3466
3467                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3468             }
3469 #ifdef REFINE_STATS
3470 STOP_TIMER("rem*w*w")}
3471 {START_TIMER
3472 #endif
3473             s->dsp.fdct(d1);
3474 #ifdef REFINE_STATS
3475 STOP_TIMER("dct")}
3476 #endif
3477         }
3478
3479         if(start_i){
3480             const int level= block[0];
3481             int change, old_coeff;
3482
3483             assert(s->mb_intra);
3484
3485             old_coeff= q*level;
3486
3487             for(change=-1; change<=1; change+=2){
3488                 int new_level= level + change;
3489                 int score, new_coeff;
3490
3491                 new_coeff= q*new_level;
3492                 if(new_coeff >= 2048 || new_coeff < 0)
3493                     continue;
3494
3495                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3496                 if(score<best_score){
3497                     best_score= score;
3498                     best_coeff= 0;
3499                     best_change= change;
3500                     best_unquant_change= new_coeff - old_coeff;
3501                 }
3502             }
3503         }
3504
3505         run=0;
3506         rle_index=0;
3507         run2= run_tab[rle_index++];
3508         prev_level=0;
3509         prev_run=0;
3510
3511         for(i=start_i; i<64; i++){
3512             int j= perm_scantable[i];
3513             const int level= block[j];
3514             int change, old_coeff;
3515
3516             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3517                 break;
3518
3519             if(level){
3520                 if(level<0) old_coeff= qmul*level - qadd;
3521                 else        old_coeff= qmul*level + qadd;
3522                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3523             }else{
3524                 old_coeff=0;
3525                 run2--;
3526                 assert(run2>=0 || i >= last_non_zero );
3527             }
3528
3529             for(change=-1; change<=1; change+=2){
3530                 int new_level= level + change;
3531                 int score, new_coeff, unquant_change;
3532
3533                 score=0;
3534                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3535                    continue;
3536
3537                 if(new_level){
3538                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3539                     else            new_coeff= qmul*new_level + qadd;
3540                     if(new_coeff >= 2048 || new_coeff <= -2048)
3541                         continue;
3542                     //FIXME check for overflow
3543
3544                     if(level){
3545                         if(level < 63 && level > -63){
3546                             if(i < last_non_zero)
3547                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3548                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3549                             else
3550                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3551                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3552                         }
3553                     }else{
3554                         assert(FFABS(new_level)==1);
3555
3556                         if(analyze_gradient){
3557                             int g= d1[ scantable[i] ];
3558                             if(g && (g^new_level) >= 0)
3559                                 continue;
3560                         }
3561
3562                         if(i < last_non_zero){
3563                             int next_i= i + run2 + 1;
3564                             int next_level= block[ perm_scantable[next_i] ] + 64;
3565
3566                             if(next_level&(~127))
3567                                 next_level= 0;
3568
3569                             if(next_i < last_non_zero)
3570                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3571                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3572                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3573                             else
3574                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3575                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3576                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3577                         }else{
3578                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3579                             if(prev_level){
3580                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3581                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3582                             }
3583                         }
3584                     }
3585                 }else{
3586                     new_coeff=0;
3587                     assert(FFABS(level)==1);
3588
3589                     if(i < last_non_zero){
3590                         int next_i= i + run2 + 1;
3591                         int next_level= block[ perm_scantable[next_i] ] + 64;
3592
3593                         if(next_level&(~127))
3594                             next_level= 0;
3595
3596                         if(next_i < last_non_zero)
3597                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3598                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3599                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3600                         else
3601                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3602                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3603                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3604                     }else{
3605                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3606                         if(prev_level){
3607                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3608                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3609                         }
3610                     }
3611                 }
3612
3613                 score *= lambda;
3614
3615                 unquant_change= new_coeff - old_coeff;
3616                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3617
3618                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3619                 if(score<best_score){
3620                     best_score= score;
3621                     best_coeff= i;
3622                     best_change= change;
3623                     best_unquant_change= unquant_change;
3624                 }
3625             }
3626             if(level){
3627                 prev_level= level + 64;
3628                 if(prev_level&(~127))
3629                     prev_level= 0;
3630                 prev_run= run;
3631                 run=0;
3632             }else{
3633                 run++;
3634             }
3635         }
3636 #ifdef REFINE_STATS
3637 STOP_TIMER("iterative step")}
3638 #endif
3639
3640         if(best_change){
3641             int j= perm_scantable[ best_coeff ];
3642
3643             block[j] += best_change;
3644
3645             if(best_coeff > last_non_zero){
3646                 last_non_zero= best_coeff;
3647                 assert(block[j]);
3648 #ifdef REFINE_STATS
3649 after_last++;
3650 #endif
3651             }else{
3652 #ifdef REFINE_STATS
3653 if(block[j]){
3654     if(block[j] - best_change){
3655         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3656             raise++;
3657         }else{
3658             lower++;
3659         }
3660     }else{
3661         from_zero++;
3662     }
3663 }else{
3664     to_zero++;
3665 }
3666 #endif
3667                 for(; last_non_zero>=start_i; last_non_zero--){
3668                     if(block[perm_scantable[last_non_zero]])
3669                         break;
3670                 }
3671             }
3672 #ifdef REFINE_STATS
3673 count++;
3674 if(256*256*256*64 % count == 0){
3675     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3676 }
3677 #endif
3678             run=0;
3679             rle_index=0;
3680             for(i=start_i; i<=last_non_zero; i++){
3681                 int j= perm_scantable[i];
3682                 const int level= block[j];
3683
3684                  if(level){
3685                      run_tab[rle_index++]=run;
3686                      run=0;
3687                  }else{
3688                      run++;
3689                  }
3690             }
3691
3692             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3693         }else{
3694             break;
3695         }
3696     }
3697 #ifdef REFINE_STATS
3698 if(last_non_zero>0){
3699 STOP_TIMER("iterative search")
3700 }
3701 }
3702 #endif
3703
3704     return last_non_zero;
3705 }
3706
3707 int dct_quantize_c(MpegEncContext *s,
3708                         DCTELEM *block, int n,
3709                         int qscale, int *overflow)
3710 {
3711     int i, j, level, last_non_zero, q, start_i;
3712     const int *qmat;
3713     const uint8_t *scantable= s->intra_scantable.scantable;
3714     int bias;
3715     int max=0;
3716     unsigned int threshold1, threshold2;
3717
3718     s->dsp.fdct (block);
3719
3720     if(s->dct_error_sum)
3721         s->denoise_dct(s, block);
3722
3723     if (s->mb_intra) {
3724         if (!s->h263_aic) {
3725             if (n < 4)
3726                 q = s->y_dc_scale;
3727             else
3728                 q = s->c_dc_scale;
3729             q = q << 3;
3730         } else
3731             /* For AIC we skip quant/dequant of INTRADC */
3732             q = 1 << 3;
3733
3734         /* note: block[0] is assumed to be positive */
3735         block[0] = (block[0] + (q >> 1)) / q;
3736         start_i = 1;
3737         last_non_zero = 0;
3738         qmat = s->q_intra_matrix[qscale];
3739         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3740     } else {
3741         start_i = 0;
3742         last_non_zero = -1;
3743         qmat = s->q_inter_matrix[qscale];
3744         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3745     }
3746     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3747     threshold2= (threshold1<<1);
3748     for(i=63;i>=start_i;i--) {
3749         j = scantable[i];
3750         level = block[j] * qmat[j];
3751
3752         if(((unsigned)(level+threshold1))>threshold2){
3753             last_non_zero = i;
3754             break;
3755         }else{
3756             block[j]=0;
3757         }
3758     }
3759     for(i=start_i; i<=last_non_zero; i++) {
3760         j = scantable[i];
3761         level = block[j] * qmat[j];
3762
3763 //        if(   bias+level >= (1<<QMAT_SHIFT)
3764 //           || bias-level >= (1<<QMAT_SHIFT)){
3765         if(((unsigned)(level+threshold1))>threshold2){
3766             if(level>0){
3767                 level= (bias + level)>>QMAT_SHIFT;
3768                 block[j]= level;
3769             }else{
3770                 level= (bias - level)>>QMAT_SHIFT;
3771                 block[j]= -level;
3772             }
3773             max |=level;
3774         }else{
3775             block[j]=0;
3776         }
3777     }
3778     *overflow= s->max_qcoeff < max; //overflow might have happened
3779
3780     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3781     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
3782         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
3783
3784     return last_non_zero;
3785 }
3786
3787 #define OFFSET(x) offsetof(MpegEncContext, x)
3788 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
3789 static const AVOption h263_options[] = {
3790     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3791     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE},
3792     { NULL },
3793 };
3794
3795 static const AVClass h263_class = {
3796     .class_name = "H.263 encoder",
3797     .item_name  = av_default_item_name,
3798     .option     = h263_options,
3799     .version    = LIBAVUTIL_VERSION_INT,
3800 };
3801
3802 AVCodec ff_h263_encoder = {
3803     .name           = "h263",
3804     .type           = AVMEDIA_TYPE_VIDEO,
3805     .id             = CODEC_ID_H263,
3806     .priv_data_size = sizeof(MpegEncContext),
3807     .init           = MPV_encode_init,
3808     .encode         = MPV_encode_picture,
3809     .close          = MPV_encode_end,
3810     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3811     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
3812     .priv_class     = &h263_class,
3813 };
3814
3815 static const AVOption h263p_options[] = {
3816     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3817     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3818     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3819     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE},
3820     { NULL },
3821 };
3822 static const AVClass h263p_class = {
3823     .class_name = "H.263p encoder",
3824     .item_name  = av_default_item_name,
3825     .option     = h263p_options,
3826     .version    = LIBAVUTIL_VERSION_INT,
3827 };
3828
3829 AVCodec ff_h263p_encoder = {
3830     .name           = "h263p",
3831     .type           = AVMEDIA_TYPE_VIDEO,
3832     .id             = CODEC_ID_H263P,
3833     .priv_data_size = sizeof(MpegEncContext),
3834     .init           = MPV_encode_init,
3835     .encode         = MPV_encode_picture,
3836     .close          = MPV_encode_end,
3837     .capabilities = CODEC_CAP_SLICE_THREADS,
3838     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3839     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
3840     .priv_class     = &h263p_class,
3841 };
3842
3843 AVCodec ff_msmpeg4v2_encoder = {
3844     .name           = "msmpeg4v2",
3845     .type           = AVMEDIA_TYPE_VIDEO,
3846     .id             = CODEC_ID_MSMPEG4V2,
3847     .priv_data_size = sizeof(MpegEncContext),
3848     .init           = MPV_encode_init,
3849     .encode         = MPV_encode_picture,
3850     .close          = MPV_encode_end,
3851     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3852     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
3853 };
3854
3855 AVCodec ff_msmpeg4v3_encoder = {
3856     .name           = "msmpeg4",
3857     .type           = AVMEDIA_TYPE_VIDEO,
3858     .id             = CODEC_ID_MSMPEG4V3,
3859     .priv_data_size = sizeof(MpegEncContext),
3860     .init           = MPV_encode_init,
3861     .encode         = MPV_encode_picture,
3862     .close          = MPV_encode_end,
3863     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3864     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
3865 };
3866
3867 AVCodec ff_wmv1_encoder = {
3868     .name           = "wmv1",
3869     .type           = AVMEDIA_TYPE_VIDEO,
3870     .id             = CODEC_ID_WMV1,
3871     .priv_data_size = sizeof(MpegEncContext),
3872     .init           = MPV_encode_init,
3873     .encode         = MPV_encode_picture,
3874     .close          = MPV_encode_end,
3875     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3876     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
3877 };