OSDN Git Service

optimize DC prediction decoding.
[qtheora/main.git] / Lib / QTheoraEx / FrameReconstructor.c
1 /* FrameReconstructor.c */
2 /* 2009/07/02           */
3
4 #include "StdAfx.h"
5
6 #include "FrameReconstructor.h"
7
8 /* */
9
10 static void Block_Extract8x8(
11         const Plane_t* plane,
12         INT32          x,
13         INT32          y,
14         UINT8*         block)
15 {
16         INT32 i, j;
17
18         for (i = 0; i < 8; i++) {
19                 for (j = 0; j < 8; j++) {
20                         INT32 xx = x + j;
21                         INT32 yy = y + i;
22
23                         if (xx < 0) {
24                                 xx = 0;
25                         } else if (xx >= plane->CX) {
26                                 xx = plane->CX - 1;
27                         }
28
29                         if (yy < 0) {
30                                 yy = 0;
31                         } else if (yy >= plane->CY) {
32                                 yy = plane->CY - 1;
33                         }
34
35                         block[i * 8 + j] = plane->Plane[yy * plane->Pitch + xx];
36                 }
37         }
38 }
39
40 /* */
41
42 static void Block_CopyIntra8x8(
43         Plane_t*     p,
44         INT32        x,
45         INT32        y,
46         const INT16* block)
47 {
48         const INT16* s = block;
49
50         UINT8* d = p->Plane + y * p->Pitch + x;
51         UINT8* e = d + 8 * p->Pitch;
52
53         UINT8 v0[2] = { 0,   0 };
54         UINT8 v1[2] = { 0, 255 };
55
56         INT32 i;
57
58         for (; d < e; d += p->Pitch) {
59                 for (i = 0; i < 8; i++, s++) {
60                         INT32 pix = 128 + *s;
61                         v0[0] = pix;
62                         v1[0] = v0[pix <   0];
63                         d [i] = v1[pix > 255];
64                 }
65         }
66 }
67
68 static void Block_ReviseInter8x8(
69         Plane_t*     p,
70         INT32        x,
71         INT32        y,
72         const INT16* block)
73 {
74         const INT16* s = block;
75
76         UINT8* d = p->Plane + y * p->Pitch + x;
77         UINT8* e = d + 8 * p->Pitch;
78
79         UINT8 v0[2] = { 0,   0 };
80         UINT8 v1[2] = { 0, 255 };
81
82         INT32 i;
83
84         for (; d < e; d += p->Pitch) {
85                 for (i = 0; i < 8; i++, s++) {
86                         INT32 pix = d[i] + *s;
87                         v0[0] = pix;
88                         v1[0] = v0[pix <   0];
89                         d [i] = v1[pix > 255];
90                 }
91         }
92 }
93
94 /* */
95
96 static void Block_CopyPlane16x16(
97         Plane_t* p,
98         INT32    x,
99         INT32    y,
100         Plane_t* s)
101 {
102         UINT8* d0 = p->Plane + y * p->Pitch + x;
103         UINT8* e0 = d0 + 16 * p->Pitch;
104
105         const UINT8* s0 = s->Plane + y * s->Pitch + x;
106
107         for (; d0 < e0; d0 += p->Pitch, s0 += s->Pitch) {
108                 d0[ 0] = s0[ 0];
109                 d0[ 1] = s0[ 1];
110                 d0[ 2] = s0[ 2];
111                 d0[ 3] = s0[ 3];
112
113                 d0[ 4] = s0[ 4];
114                 d0[ 5] = s0[ 5];
115                 d0[ 6] = s0[ 6];
116                 d0[ 7] = s0[ 7];
117
118                 d0[ 8] = s0[ 8];
119                 d0[ 9] = s0[ 9];
120                 d0[10] = s0[10];
121                 d0[11] = s0[11];
122
123                 d0[12] = s0[12];
124                 d0[13] = s0[13];
125                 d0[14] = s0[14];
126                 d0[15] = s0[15];
127         }
128 }
129
130 static void Block_CopyPlane8x8(
131         Plane_t* p,
132         INT32    x,
133         INT32    y,
134         Plane_t* s)
135 {
136         UINT8* d0 = p->Plane + y * p->Pitch + x;
137         UINT8* e0 = d0 + 8 * p->Pitch;
138
139         const UINT8* s0 = s->Plane + y * s->Pitch + x;
140
141         for (; d0 < e0; d0 += p->Pitch, s0 += s->Pitch) {
142                 d0[0] = s0[0];
143                 d0[1] = s0[1];
144                 d0[2] = s0[2];
145                 d0[3] = s0[3];
146
147                 d0[4] = s0[4];
148                 d0[5] = s0[5];
149                 d0[6] = s0[6];
150                 d0[7] = s0[7];
151         }
152 }
153
154 /* */
155
156 static void MotionComp_Compensate8x8(
157         UINT8*         p,
158         INT32          pitch,
159         const Plane_t* r,
160         INT32          x,
161         INT32          y);
162
163 static void MotionComp_Compensate8x8H(
164         UINT8*         p,
165         INT32          pitch,
166         const Plane_t* r,
167         INT32          x0,
168         INT32          y0,
169         INT32          x1,
170         INT32          y1);
171
172 /* */
173
174 static void MotionComp_Compensate16x16(
175         UINT8*         p,
176         INT32          pitch,
177         const Plane_t* r,
178         INT32          x,
179         INT32          y)
180 {
181         if (x >= 0 && x + 16 < r->CX &&
182                 y >= 0 && y + 16 < r->CY) {
183                 const UINT8* s = r->Plane + y * r->Pitch + x;
184
185                 UINT8* d = p;
186                 UINT8* e = p + 16 * pitch;
187                 while (d < e) {
188                         d[ 0] = s[ 0];
189                         d[ 1] = s[ 1];
190                         d[ 2] = s[ 2];
191                         d[ 3] = s[ 3];
192
193                         d[ 4] = s[ 4];
194                         d[ 5] = s[ 5];
195                         d[ 6] = s[ 6];
196                         d[ 7] = s[ 7];
197
198                         d[ 8] = s[ 8];
199                         d[ 9] = s[ 9];
200                         d[10] = s[10];
201                         d[11] = s[11];
202
203                         d[12] = s[12];
204                         d[13] = s[13];
205                         d[14] = s[14];
206                         d[15] = s[15];
207
208                         s += r->Pitch;
209                         d += pitch;
210                 }
211
212         } else {
213                 MotionComp_Compensate8x8(p,                 pitch, r, x,     y    );
214                 MotionComp_Compensate8x8(p + 8,             pitch, r, x + 8, y    );
215                 MotionComp_Compensate8x8(p     + 8 * pitch, pitch, r, x,     y + 8);
216                 MotionComp_Compensate8x8(p + 8 + 8 * pitch, pitch, r, x + 8, y + 8);
217         }
218 }
219
220 static void MotionComp_Compensate8x8(
221         UINT8*         p,
222         INT32          pitch,
223         const Plane_t* r,
224         INT32          x,
225         INT32          y)
226 {
227         UINT8 b[64];
228
229         const UINT8* s0 = r->Plane + y * r->Pitch + x;
230         INT32        p0 = r->Pitch;
231
232         UINT8* d = p;
233         UINT8* e = p + 8 * pitch;
234
235         if (x < 0 || x + 8 >= r->CX ||
236                 y < 0 || y + 8 >= r->CY) {
237                 s0 = b;
238                 p0 = 8;
239                 Block_Extract8x8(r, x, y, b);
240         }
241
242         while (d < e) {
243                 d[ 0] = s0[ 0];
244                 d[ 1] = s0[ 1];
245                 d[ 2] = s0[ 2];
246                 d[ 3] = s0[ 3];
247
248                 d[ 4] = s0[ 4];
249                 d[ 5] = s0[ 5];
250                 d[ 6] = s0[ 6];
251                 d[ 7] = s0[ 7];
252
253                 s0 += p0;
254                 d  += pitch;
255         }
256 }
257
258 static void MotionComp_Compensate16x16H(
259         UINT8*         p,
260         INT32          pitch,
261         const Plane_t* r,
262         INT32          x0,
263         INT32          y0,
264         INT32          x1,
265         INT32          y1)
266 {
267         if (x0 >= 0 && x0 + 16 < r->CX &&
268                 y0 >= 0 && y0 + 16 < r->CY &&
269                 x1 >= 0 && x1 + 16 < r->CX &&
270                 y1 >= 0 && y1 + 16 < r->CY) {
271                 const UINT8* s0 = r->Plane + y0 * r->Pitch + x0;
272                 const UINT8* s1 = r->Plane + y1 * r->Pitch + x1;
273
274                 UINT8* d = p;
275                 UINT8* e = p + 16 * pitch;
276                 while (d < e) {
277                         d[ 0] = (s0[ 0] + s1[ 0]) >> 1;
278                         d[ 1] = (s0[ 1] + s1[ 1]) >> 1;
279                         d[ 2] = (s0[ 2] + s1[ 2]) >> 1;
280                         d[ 3] = (s0[ 3] + s1[ 3]) >> 1;
281
282                         d[ 4] = (s0[ 4] + s1[ 4]) >> 1;
283                         d[ 5] = (s0[ 5] + s1[ 5]) >> 1;
284                         d[ 6] = (s0[ 6] + s1[ 6]) >> 1;
285                         d[ 7] = (s0[ 7] + s1[ 7]) >> 1;
286
287                         d[ 8] = (s0[ 8] + s1[ 8]) >> 1;
288                         d[ 9] = (s0[ 9] + s1[ 9]) >> 1;
289                         d[10] = (s0[10] + s1[10]) >> 1;
290                         d[11] = (s0[11] + s1[11]) >> 1;
291
292                         d[12] = (s0[12] + s1[12]) >> 1;
293                         d[13] = (s0[13] + s1[13]) >> 1;
294                         d[14] = (s0[14] + s1[14]) >> 1;
295                         d[15] = (s0[15] + s1[15]) >> 1;
296
297                         s0 += r->Pitch;
298                         s1 += r->Pitch;
299                         d  += pitch;
300                 }
301
302         } else {
303                 MotionComp_Compensate8x8H(p,                 pitch, r, x0,     y0    , x1,     y1    );
304                 MotionComp_Compensate8x8H(p + 8,             pitch, r, x0 + 8, y0    , x1 + 8, y1    );
305                 MotionComp_Compensate8x8H(p     + 8 * pitch, pitch, r, x0,     y0 + 8, x1,     y1 + 8);
306                 MotionComp_Compensate8x8H(p + 8 + 8 * pitch, pitch, r, x0 + 8, y0 + 8, x1 + 8, y1 + 8);
307         }
308 }
309
310 static void MotionComp_Compensate8x8H(
311         UINT8*         p,
312         INT32          pitch,
313         const Plane_t* r,
314         INT32          x0,
315         INT32          y0,
316         INT32          x1,
317         INT32          y1)
318 {
319         UINT8 b0[64], b1[64];
320
321         const UINT8* s0 = r->Plane + y0 * r->Pitch + x0;
322         INT32        p0 = r->Pitch;
323
324         const UINT8* s1 = r->Plane + y1 * r->Pitch + x1;
325         INT32        p1 = r->Pitch;
326
327         UINT8* d = p;
328         UINT8* e = p + 8 * pitch;
329
330         if (x0 < 0 || x0 + 8 >= r->CX ||
331                 y0 < 0 || y0 + 8 >= r->CY ||
332                 x1 < 0 || x1 + 8 >= r->CX ||
333                 y1 < 0 || y1 + 8 >= r->CY) {
334                 s0 = b0;
335                 p0 = 8;
336
337                 s1 = b1;
338                 p1 = 8;
339
340                 Block_Extract8x8(r, x0, y0, b0);
341                 Block_Extract8x8(r, x1, y1, b1);
342         }
343
344         while (d < e) {
345                 d[0] = (s0[0] + s1[0]) >> 1;
346                 d[1] = (s0[1] + s1[1]) >> 1;
347                 d[2] = (s0[2] + s1[2]) >> 1;
348                 d[3] = (s0[3] + s1[3]) >> 1;
349
350                 d[4] = (s0[4] + s1[4]) >> 1;
351                 d[5] = (s0[5] + s1[5]) >> 1;
352                 d[6] = (s0[6] + s1[6]) >> 1;
353                 d[7] = (s0[7] + s1[7]) >> 1;
354
355                 s0 += p0;
356                 s1 += p1;
357                 d  += pitch;
358         }
359 }
360
361 /* */
362
363 static void MotionComp_Block16x16(
364         Plane_t*              p,
365         INT32                 x,
366         INT32                 y,
367         const Plane_t*        r,
368         const MotionVector_t* mv)
369 {
370         INT32 dx = ((mv->X & 1) != 0);
371         INT32 dy = ((mv->Y & 1) != 0);
372
373         INT32 vx[2] = { mv->X >> 1 };
374         INT32 vy[2] = { mv->Y >> 1 };
375
376         UINT8* d = p->Plane + y * p->Pitch + x;
377
378         if (dx == 0 && dy == 0) {
379                 MotionComp_Compensate16x16(
380                         d,
381                         p->Pitch,
382                         r,
383                         x + vx[0],
384                         y + vy[0]);
385
386         } else {
387                 vx[1] = vx[0];
388                 vy[1] = vy[0];
389
390                 vx[mv->X >= 0] += dx;
391                 vy[mv->Y >= 0] += dy;
392
393                 MotionComp_Compensate16x16H(
394                         d,
395                         p->Pitch,
396                         r,
397                         x + vx[0],
398                         y + vy[0],
399                         x + vx[1],
400                         y + vy[1]);
401         }
402 }
403
404 static void MotionComp_Block8x8Y(
405         Plane_t*              p,
406         INT32                 x,
407         INT32                 y,
408         const Plane_t*        r,
409         const MotionVector_t* mv)
410 {
411         INT32 dx = ((mv->X & 1) != 0);
412         INT32 dy = ((mv->Y & 1) != 0);
413
414         INT32 vx[2] = { mv->X >> 1 };
415         INT32 vy[2] = { mv->Y >> 1 };
416
417         UINT8* d = p->Plane + y * p->Pitch + x;
418
419         if (dx == 0 && dy == 0) {
420                 MotionComp_Compensate8x8(
421                         d,
422                         p->Pitch,
423                         r,
424                         x + vx[0],
425                         y + vy[0]);
426
427         } else {
428                 vx[1] = vx[0];
429                 vy[1] = vy[0];
430
431                 vx[mv->X >= 0] += dx;
432                 vy[mv->Y >= 0] += dy;
433
434                 MotionComp_Compensate8x8H(
435                         d,
436                         p->Pitch,
437                         r,
438                         x + vx[0],
439                         y + vy[0],
440                         x + vx[1],
441                         y + vy[1]);
442         }
443 }
444
445 static void MotionComp_Block8x8C(
446         Plane_t*              p,
447         INT32                 x,
448         INT32                 y,
449         const Plane_t*        r,
450         const MotionVector_t* mv0)
451 {
452         MotionVector_t mv = {
453                 (mv0->X >> 1) | (mv0->X & 1),
454                 (mv0->Y >> 1) | (mv0->Y & 1)
455         };
456
457         INT32 dx = ((mv.X & 1) != 0);
458         INT32 dy = ((mv.Y & 1) != 0);
459
460         INT32 vx[2] = { mv.X >> 1 };
461         INT32 vy[2] = { mv.Y >> 1 };
462
463         UINT8* d = p->Plane + y * p->Pitch + x;
464
465         if (dx == 0 && dy == 0) {
466                 MotionComp_Compensate8x8(
467                         d,
468                         p->Pitch,
469                         r,
470                         x + vx[0],
471                         y + vy[0]);
472
473         } else {
474                 vx[1] = vx[0];
475                 vy[1] = vy[0];
476
477                 vx[mv.X >= 0] += dx;
478                 vy[mv.Y >= 0] += dy;
479
480                 MotionComp_Compensate8x8H(
481                         d,
482                         p->Pitch,
483                         r,
484                         x + vx[0],
485                         y + vy[0],
486                         x + vx[1],
487                         y + vy[1]);
488         }
489 }
490
491 /* */
492
493 static const INT32 COS[8] = {
494         65536,
495         64277,
496         60547,
497         54491,
498         46341,
499         36410,
500         25080,
501         12785
502 };
503
504 #define MUL(T,X) ((COS[T] * (X)) >> 16)
505
506 static void IDCT_8(
507         const INT16* x,
508         INT16*       y)
509 {
510         INT32 t[8];
511         INT32 t_3, t_2, t_5;
512         INT16 s[2];
513
514         /* Stage.1 */
515
516         s[0] = x[0] + x[4];
517         s[1] = x[0] - x[4];
518
519         t[0] = MUL(4, s[0]);
520         t[1] = MUL(4, s[1]);
521
522         t[2] = MUL(6, x[2]) - MUL(2, x[6]);
523         t[3] = MUL(2, x[2]) + MUL(6, x[6]);
524
525         t[4] = MUL(7, x[1]) - MUL(1, x[7]);
526         t[5] = MUL(3, x[5]) - MUL(5, x[3]);
527         t[6] = MUL(5, x[5]) + MUL(3, x[3]);
528         t[7] = MUL(1, x[1]) + MUL(7, x[7]);
529
530         /* Stage.2 */
531
532         s[0] = t[4] - t[5];
533         s[1] = t[7] - t[6];
534
535         t[4] += t[5];
536         t[5]  = MUL(4, s[0]);
537
538         t[7] += t[6];
539         t[6]  = MUL(4, s[1]);
540
541         /* Stage.3 */
542
543         t_3 = t[0] - t[3];
544         t_2 = t[1] - t[2];
545         t_5 = t[6] - t[5];
546
547         t[0] += t[3];
548         t[1] += t[2];
549         t[6] += t[5];
550
551         /* Stage.4 */
552
553         y[0*8] = t[0] + t[7];
554         y[1*8] = t[1] + t[6];
555         y[2*8] = t_2  + t_5 ;
556         y[3*8] = t_3  + t[4];
557         y[4*8] = t_3  - t[4];
558         y[5*8] = t_2  - t_5 ;
559         y[6*8] = t[1] - t[6];
560         y[7*8] = t[0] - t[7];
561 }
562
563 /* */
564
565 static void IDCT_8x8(
566         const INT16* x,
567         INT16*       y)
568 {
569         const INT16* xx;
570         INT16*       yy;
571         INT16*       end;
572         INT16        w[64];
573
574         for (xx = x, yy = w, end = yy + 8; yy < end; xx += 4*8, yy += 4) {
575                 IDCT_8(xx + 0*8, yy + 0);
576                 IDCT_8(xx + 1*8, yy + 1);
577                 IDCT_8(xx + 2*8, yy + 2);
578                 IDCT_8(xx + 3*8, yy + 3);
579         }
580
581         for (xx = w, yy = y, end = yy + 8; yy < end; xx += 4*8, yy += 4) {
582                 IDCT_8(xx + 0*8, yy + 0);
583                 IDCT_8(xx + 1*8, yy + 1);
584                 IDCT_8(xx + 2*8, yy + 2);
585                 IDCT_8(xx + 3*8, yy + 3);
586         }
587
588         for (yy = y, end = yy + 64; yy < end; yy += 4) {
589                 yy[0] = (yy[0] + 8) >> 4;
590                 yy[1] = (yy[1] + 8) >> 4;
591                 yy[2] = (yy[2] + 8) >> 4;
592                 yy[3] = (yy[3] + 8) >> 4;
593         }
594 }
595
596 /* */
597
598 struct DecodeCoefficientsContext {
599
600         INT32 EOB_Run[64];
601
602         INT8*  Run  [64];
603         INT16* Coeff[64];
604
605 }; /* DecodeCoefficientsContext */
606
607 typedef struct DecodeCoefficientsContext DecodeCoefficientsContext_t;
608
609 static INT32 DecodeCoefficients(
610         FrameDecoder_t*              t,
611         DecodeCoefficientsContext_t* ctx,
612         INT16*                       block)
613 {
614         INT16* b = block;
615         INT16* e = b + 64;
616
617         INT32 i = 0;
618
619         while (b < e) {
620                 if (ctx->EOB_Run[i] > 0) {
621                         ctx->EOB_Run[i] -= 1;
622                         break;
623
624                 } else {
625                         INT32 run   = *((ctx->Run  [i])++);
626                         INT16 coeff = *((ctx->Coeff[i])++);
627
628                         if (run < 0) {
629                                 ctx->EOB_Run[i] = coeff;
630
631                         } else {
632                                 INT16* p = b + run;
633                                 if (p >= e) {
634                                         break;
635                                 }
636
637                                 while (b < p) {
638                                         *(b++) = 0;
639                                 }
640
641                                 *(b++) = coeff;
642
643                                 i = b - block;
644                         }
645                 }
646         }
647
648         while (b < e) {
649                 *(b++) = 0;
650         }
651
652         return i;
653 }
654
655 /* */
656
657 static const UINT8 ZIGZAG[64] = {
658          0, 1, 5, 6,14,15,27,28,
659          2, 4, 7,13,16,26,29,42,
660          3, 8,12,17,25,30,41,43,
661          9,11,18,24,31,40,44,53,
662         10,19,23,32,39,45,52,54,
663         20,22,33,38,46,51,55,60,
664         21,34,37,47,50,56,59,61,
665         35,36,48,49,57,58,62,63
666 };
667
668 static void Dequantize_DoDequantize(
669         const DequantizeMatrix_t* m,
670         INT32                     intra,
671         INT32                     plane,
672         INT16*                    block,
673         const INT16*              fragment)
674 {
675         const INT16* mat = m->Matrix[intra][plane];
676
677         INT32 i;
678         for (i = 0; i < 64; i++) {
679                 block[i] = fragment[ZIGZAG[i]] * mat[i];
680         }
681 }
682
683 /* */
684
685 static void Reconstruct_IntraBlock(
686         FrameDecoder_t*              t,
687         Plane_t*                     p,
688         INT32                        x,
689         INT32                        y,
690         INT16                        dc,
691         INT32                        qi,
692         INT32                        plane,
693         Plane_t*                     r,
694         DecodeCoefficientsContext_t* ctx)
695 {
696         INT16 block[64];
697         INT16 coeff[64];
698
699         if (dc == NOT_CODED) {
700                 Block_CopyPlane8x8(p, x, y, r);
701                 return;
702         }
703
704         DecodeCoefficients(t, ctx, block);
705
706         block[0] = dc;
707
708         Dequantize_DoDequantize(
709                 &(t->Dequantize[qi]),
710                 0,
711                 plane,
712                 coeff,
713                 block);
714
715         IDCT_8x8(coeff, coeff);
716
717         Block_CopyIntra8x8(p, x, y, coeff);
718 }
719
720 /* */
721
722 static void Reconstruct_InterBlock(
723         FrameDecoder_t*              t,
724         Plane_t*                     p,
725         INT32                        x,
726         INT32                        y,
727         INT16                        dc,
728         INT32                        qi,
729         INT32                        plane,
730         Plane_t*                     r,
731         DecodeCoefficientsContext_t* ctx)
732 {
733         INT16 block[64];
734         INT16 coeff[64];
735
736         if (dc == NOT_CODED) {
737                 if (r != NULL) {
738                         Block_CopyPlane8x8(p, x, y, r);
739                 }
740                 return;
741         }
742
743         DecodeCoefficients(t, ctx, block);
744
745         block[0] = dc;
746
747         Dequantize_DoDequantize(
748                 &(t->Dequantize[qi]),
749                 1,
750                 plane,
751                 coeff,
752                 block);
753
754         IDCT_8x8(coeff, coeff);
755
756         Block_ReviseInter8x8(p, x, y, coeff);
757 }
758
759 /* */
760
761 static const INT8 S_PX[16] = {
762         0*8, 1*8, 1*8, 0*8,
763         0*8, 0*8, 1*8, 1*8,
764         2*8, 2*8, 3*8, 3*8,
765         3*8, 2*8, 2*8, 3*8
766 };
767
768 static const INT8 S_PY[16] = {
769         0*8, 0*8, 1*8, 1*8,
770         2*8, 3*8, 3*8, 2*8,
771         2*8, 3*8, 3*8, 2*8,
772         1*8, 1*8, 0*8, 0*8
773 };
774
775 static const INT8 M_PX[4] = {
776         0*16, 0*16,
777         1*16, 1*16
778 };
779
780 static const INT8 M_PY[4] = {
781         0*16, 1*16,
782         1*16, 0*16
783 };
784
785 /* */
786
787 static void Reconstruct_YPlane(
788         FrameDecoder_t* t)
789 {
790         INT32 x, y;
791
792         INT32 sx = t->Index->SX[0] * 32;
793         INT32 sy = t->Index->SY[0] * 32;
794
795         INT32 mx = t->Index->MX * 16;
796         INT32 my = t->Index->MY * 16;
797
798         INT32 bx = t->Index->BX[0];
799
800         const UINT16* bi = t->Index->BIndex[0];
801
802         Plane_t* g = t->Frame[0];
803         Plane_t* p = t->Frame[1];
804         Plane_t* r = t->Frame[2];
805
806         const UINT8*          mm = t->MBMode;
807         const MotionVector_t* mv = t->MV;
808
809         const UINT8* qi = t->BQI;
810
811         DecodeCoefficientsContext_t ctx = { 0 };
812
813         INT32 i;
814         for (i = 0; i < 64; i++) {
815                 ctx.Run  [i] = t->BRun  [0][i];
816                 ctx.Coeff[i] = t->BCoeff[0][i];
817         }
818
819         for (y = 0; y < sy; y += 32) {
820                 for (x = 0; x < sx; x += 32) {
821                         INT32 i = 0;
822
823                         INT32 m;
824                         for (m = 0; m < 4; m++, i += 4) {
825                                 INT32 x0 = x + M_PX[m];
826                                 INT32 y0 = y + M_PY[m];
827                                 if (x0 < mx && y0 < my) {
828                                         switch (*mm) {
829                                         case 0: /* INTER_NOMV */
830                                                 Block_CopyPlane16x16(p, x0, y0, r);
831
832                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, NULL, &ctx);
833                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, NULL, &ctx);
834                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, NULL, &ctx);
835                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, NULL, &ctx);
836                                                 break;
837
838                                         case 1: /* INTRA */
839                                                 Reconstruct_IntraBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
840                                                 Reconstruct_IntraBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
841                                                 Reconstruct_IntraBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
842                                                 Reconstruct_IntraBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
843                                                 break;
844
845                                         case 2: /* INTER_MV */
846                                         case 3: /* INTER_MV_LAST */
847                                         case 4: /* INTER_MV_LAST2 */
848                                                 MotionComp_Block16x16(p, x0, y0, r, mv);
849
850                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
851                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
852                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
853                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
854                                                 break;
855
856                                         case 5: /* INTER_GOLDEN_NOMV */
857                                                 Block_CopyPlane16x16(p, x0, y0, g);
858
859                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
860                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
861                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
862                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
863                                                 break;
864
865                                         case 6: /* INTER_GOLDEN_MV */
866                                                 MotionComp_Block16x16(p, x0, y0, g, mv);
867
868                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
869                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
870                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
871                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
872                                                 break;
873
874                                         case 7: /* INTER_MV_FOUR */
875                                         {
876                                                 const MotionVector_t* v = mv;
877
878                                                 const INT16* dc = t->DC + (x0 >> 3) + (y0 >> 3) * bx;
879
880                                                 if (dc[0] != NOT_CODED) {
881                                                         MotionComp_Block8x8Y(p, x0 + 0, y0 + 0, r, v++);
882                                                 }
883
884                                                 if (dc[1] != NOT_CODED) {
885                                                         MotionComp_Block8x8Y(p, x0 + 8, y0 + 0, r, v++);
886                                                 }
887
888                                                 if (dc[0 + bx] != NOT_CODED) {
889                                                         MotionComp_Block8x8Y(p, x0 + 0, y0 + 8, r, v++);
890                                                 }
891
892                                                 if (dc[1 + bx] != NOT_CODED) {
893                                                         MotionComp_Block8x8Y(p, x0 + 8, y0 + 8, r, v++);
894                                                 }
895
896                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 0], y + S_PY[i + 0], t->DC[bi[0]], qi[0], 0, r, &ctx);
897                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 1], y + S_PY[i + 1], t->DC[bi[1]], qi[1], 0, r, &ctx);
898                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 2], y + S_PY[i + 2], t->DC[bi[2]], qi[2], 0, r, &ctx);
899                                                 Reconstruct_InterBlock(t, p, x + S_PX[i + 3], y + S_PY[i + 3], t->DC[bi[3]], qi[3], 0, r, &ctx);
900                                                 break;
901                                         }
902
903                                         } /* switch */
904
905                                         bi += 4;
906                                         mm += 1;
907                                         mv += 4;
908                                         qi += 4;
909                                 }
910                         }
911                 }
912         }
913 }
914
915 /* */
916
917 static void Reconstruct_CPlane(
918         FrameDecoder_t* t)
919 {
920         INT32 x, y;
921
922         INT32 sx = t->Index->SX[1] * 32;
923         INT32 sy = t->Index->SY[1] * 32;
924
925         INT32 mx = t->Index->MX * 8;
926         INT32 my = t->Index->MY * 8;
927
928         INT32 bx = t->Index->BX[1];
929
930         Plane_t* g = t->Frame[0];
931         Plane_t* p = t->Frame[1];
932         Plane_t* r = t->Frame[2];
933
934         const INT16* DC0 = t->DC + t->Index->BC[0];
935         const INT16* DC1 = DC0   + t->Index->BC[1];
936
937         const UINT8* m = t->BMode + t->Index->BC[0];
938
939         const UINT8* qi0 = t->BQI + t->Index->BC[0];
940         const UINT8* qi1 = qi0    + t->Index->BC[1];
941
942         DecodeCoefficientsContext_t ctx[2] = { 0 };
943
944         INT32 i;
945         for (i = 0; i < 64; i++) {
946                 ctx[0].Run  [i] = t->BRun  [1][i];
947                 ctx[0].Coeff[i] = t->BCoeff[1][i];
948
949                 ctx[1].Run  [i] = t->BRun  [2][i];
950                 ctx[1].Coeff[i] = t->BCoeff[2][i];
951         }
952
953         for (y = 0; y < sy; y += 32) {
954                 for (x = 0; x < sx; x += 32) {
955                         INT32 i;
956                         for (i = 0; i < 16; i++) {
957                                 INT32 xx = x + S_PX[i];
958                                 INT32 yy = y + S_PY[i];
959
960                                 if (xx < mx && yy < my) {
961                                         INT32 idx = (xx >> 3) + (yy >> 3) * bx;
962
963                                         switch (m[idx]) {
964                                         case 0: /* INTER_NOMV */
965                                                 Block_CopyPlane8x8(p + 1, xx, yy, r + 1);
966                                                 Block_CopyPlane8x8(p + 2, xx, yy, r + 2);
967
968                                                 Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, NULL, ctx + 0);
969                                                 Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, NULL, ctx + 1);
970                                                 break;
971
972                                         case 1: /* INTRA */
973                                                 Reconstruct_IntraBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
974                                                 Reconstruct_IntraBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
975                                                 break;
976
977                                         case 2: /* INTER_MV */
978                                         case 3: /* INTER_MV_LAST */
979                                         case 4: /* INTER_MV_LAST2 */
980                                                 MotionComp_Block8x8C(p + 1, xx, yy, r + 1, t->MVC + idx);
981                                                 MotionComp_Block8x8C(p + 2, xx, yy, r + 2, t->MVC + idx);
982
983                                                 Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
984                                                 Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
985                                                 break;
986
987                                         case 5: /* INTER_GOLDEN_NOMV */
988                                                 Block_CopyPlane8x8(p + 1, xx, yy, g + 1);
989                                                 Block_CopyPlane8x8(p + 2, xx, yy, g + 2);
990
991                                                 Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
992                                                 Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
993                                                 break;
994
995                                         case 6: /* INTER_GOLDEN_MV */
996                                                 MotionComp_Block8x8C(p + 1, xx, yy, g + 1, t->MVC + idx);
997                                                 MotionComp_Block8x8C(p + 2, xx, yy, g + 2, t->MVC + idx);
998
999                                                 Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
1000                                                 Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
1001                                                 break;
1002
1003                                         case 7: /* INTER_MV_FOUR */
1004                                                 MotionComp_Block8x8C(p + 1, xx, yy, r + 1, t->MVC + idx);
1005                                                 MotionComp_Block8x8C(p + 2, xx, yy, r + 2, t->MVC + idx);
1006
1007                                                 Reconstruct_InterBlock(t, p + 1, xx, yy, DC0[idx], *qi0, 1, r + 1, ctx + 0);
1008                                                 Reconstruct_InterBlock(t, p + 2, xx, yy, DC1[idx], *qi1, 2, r + 2, ctx + 1);
1009                                                 break;
1010
1011                                         } /* switch */
1012
1013                                         qi0++;
1014                                         qi1++;
1015                                 }
1016                         }
1017                 }
1018         }
1019 }
1020
1021 /* */
1022
1023 static void Filter_LoopFilterH(
1024         const LoopFilter_t* t,
1025         UINT8*              b,
1026         INT32               s)
1027 {
1028         const INT16* d = t->Delta + 127;
1029
1030         INT32 p0[2];
1031         INT32 p1[2];
1032
1033         INT32 q0[2];
1034         INT32 q1[2];
1035
1036         UINT8* p   = b;
1037         UINT8* end = p + s * 8;
1038
1039         p0[1] = 0;
1040         p1[1] = 0;
1041         q0[1] = 255;
1042         q1[1] = 255;
1043
1044         for (; p < end; p += s) {
1045                 INT32 x = (p[-2] - p[1]) + 3 * (p[0] - p[-1]);
1046                 INT32 v = d[(x + 4) >> 3];
1047
1048                 p0[0] = p[-1] + v;
1049                 p1[0] = p[ 0] - v;
1050
1051                 q0[0] = p0[(p0[0] < 0)];
1052                 q1[0] = p1[(p1[0] < 0)];
1053
1054                 p[-1] = q0[(q0[0] > 255)];
1055                 p[ 0] = q1[(q1[0] > 255)];
1056         }
1057 }
1058
1059 static void Filter_LoopFilterV(
1060         const LoopFilter_t* t,
1061         UINT8*              b,
1062         INT32               s)
1063 {
1064         const INT16* d = t->Delta + 127;
1065
1066         INT32 p0[2];
1067         INT32 p1[2];
1068
1069         INT32 q0[2];
1070         INT32 q1[2];
1071
1072         UINT8* p   = b;
1073         UINT8* end = p + 8;
1074
1075         p0[1] = 0;
1076         p1[1] = 0;
1077         q0[1] = 255;
1078         q1[1] = 255;
1079
1080         for (; p < end; p++) {
1081                 INT32 x = (p[-2 * s] - p[1 * s]) + 3 * (p[0] - p[-1 * s]);
1082                 INT32 v = d[(x + 4) >> 3];
1083
1084                 p0[0] = p[-s] + v;
1085                 p1[0] = p[ 0] - v;
1086
1087                 q0[0] = p0[(p0[0] < 0)];
1088                 q1[0] = p1[(p1[0] < 0)];
1089
1090                 p[-s] = q0[(q0[0] > 255)];
1091                 p[ 0] = q1[(q1[0] > 255)];
1092         }
1093 }
1094
1095 /* */
1096
1097 static void FrameLoopFilter(
1098         FrameDecoder_t* t)
1099 {
1100         INT32 i;
1101         INT32 x, y;
1102
1103         const INT16* b = t->DC;
1104
1105         Plane_t* plane = t->Frame[1];
1106
1107         for (i = 0; i < 3; i++, plane++) {
1108                 INT32 bx = t->Index->BX[i];
1109                 INT32 by = t->Index->BY[i];
1110
1111                 UINT8* r0 = plane->Plane;
1112
1113                 for (y = 0; y < by; y++, r0 += plane->Pitch * 8) {
1114                         UINT8* r = r0;
1115
1116                         for (x = 0; x < bx; x++, r += 8, b++) {
1117                                 if (*b != NOT_CODED) {
1118                                         if (x > 0) {
1119                                                 Filter_LoopFilterH(&(t->Filter), r, plane->Pitch);
1120                                         }
1121
1122                                         if (y > 0) {
1123                                                 Filter_LoopFilterV(&(t->Filter), r, plane->Pitch);
1124                                         }
1125
1126                                         if (x < bx - 1 && b[ 1] == NOT_CODED) {
1127                                                 Filter_LoopFilterH(&(t->Filter), r + 8, plane->Pitch);
1128                                         }
1129
1130                                         if (y < by - 1 && b[bx] == NOT_CODED) {
1131                                                 Filter_LoopFilterV(&(t->Filter), r + 8 * plane->Pitch, plane->Pitch);
1132                                         }
1133                                 }
1134                         }
1135                 }
1136         }
1137 }
1138
1139 /* */
1140
1141 void QT_ReconstructFrame(
1142         FrameDecoder_t* t)
1143 {
1144         Reconstruct_YPlane(t);
1145
1146         Reconstruct_CPlane(t);
1147
1148         if (t->Filter.Limit > 0) {
1149                 FrameLoopFilter(t);
1150         }
1151 }
1152
1153 /* */
1154