libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include "libavutil/avassert.h"
  79 #include <inttypes.h>
  80 #include <stdio.h>
  81 #include <stdlib.h>
  82 #include <string.h>
  83 //#undef HAVE_MMXEXT_INLINE
  84 //#define HAVE_AMD3DNOW_INLINE
  85 //#undef HAVE_MMX_INLINE
  86 //#undef ARCH_X86
  87 //#define DEBUG_BRIGHTNESS
  88 #include "postprocess.h"
  89 #include "postprocess_internal.h"
  90 #include "libavutil/avstring.h"
  91
  92 #include "libavutil/ffversion.h"
  93 const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
  94
  95 unsigned postproc_version(void)
  96 {
  97     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
  98     return LIBPOSTPROC_VERSION_INT;
  99 }
 100
 101 const char *postproc_configuration(void)
 102 {
 103     return FFMPEG_CONFIGURATION;
 104 }
 105
 106 const char *postproc_license(void)
 107 {
 108 #define LICENSE_PREFIX "libpostproc license: "
 109     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 110 }
 111
 112 #if HAVE_ALTIVEC_H
 113 #include <altivec.h>
 114 #endif
 115
 116 #define GET_MODE_BUFFER_SIZE 500
 117 #define OPTIONS_ARRAY_SIZE 10
 118 #define BLOCK_SIZE 8
 119 #define TEMP_STRIDE 8
 120 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 121
 122 #if ARCH_X86 && HAVE_INLINE_ASM
 123 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 124 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 125 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 126 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 127 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 128 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 129 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 130 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 131 #endif
 132
 133 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 134
 135
 136 static const struct PPFilter filters[]=
 137 {
 138     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 139     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 140 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 141     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 142     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 143     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 144     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 145     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 146     {"dr", "dering",                1, 5, 6, DERING},
 147     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 148     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 149     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 150     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 151     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 152     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 153     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 154     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 155     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 156     {"be", "bitexact",              1, 0, 0, BITEXACT},
 157     {"vi", "visualize",             1, 0, 0, VISUALIZE},
 158     {NULL, NULL,0,0,0,0} //End Marker
 159 };
 160
 161 static const char * const replaceTable[]=
 162 {
 163     "default",      "hb:a,vb:a,dr:a",
 164     "de",           "hb:a,vb:a,dr:a",
 165     "fast",         "h1:a,v1:a,dr:a",
 166     "fa",           "h1:a,v1:a,dr:a",
 167     "ac",           "ha:a:128:7,va:a,dr:a",
 168     NULL //End Marker
 169 };
 170
 171
 172 #if ARCH_X86 && HAVE_INLINE_ASM
 173 static inline void prefetchnta(const void *p)
 174 {
 175     __asm__ volatile(   "prefetchnta (%0)\n\t"
 176         : : "r" (p)
 177     );
 178 }
 179
 180 static inline void prefetcht0(const void *p)
 181 {
 182     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 183         : : "r" (p)
 184     );
 185 }
 186
 187 static inline void prefetcht1(const void *p)
 188 {
 189     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 190         : : "r" (p)
 191     );
 192 }
 193
 194 static inline void prefetcht2(const void *p)
 195 {
 196     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 197         : : "r" (p)
 198     );
 199 }
 200 #endif
 201
 202 /* The horizontal functions exist only in C because the MMX
 203  * code is faster with vertical filters and transposing. */
 204
 205 /**
 206  * Check if the given 8x8 Block is mostly "flat"
 207  */
 208 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
 209 {
 210     int numEq= 0;
 211     int y;
 212     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 213     const int dcThreshold= dcOffset*2 + 1;
 214
 215     for(y=0; y<BLOCK_SIZE; y++){
 216         numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
 217         numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
 218         numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
 219         numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
 220         numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
 221         numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
 222         numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
 223         src+= stride;
 224     }
 225     return numEq > c->ppMode.flatnessThreshold;
 226 }
 227
 228 /**
 229  * Check if the middle 8x8 Block in the given 8x16 block is flat
 230  */
 231 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
 232 {
 233     int numEq= 0;
 234     int y;
 235     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 236     const int dcThreshold= dcOffset*2 + 1;
 237
 238     src+= stride*4; // src points to begin of the 8x8 Block
 239     for(y=0; y<BLOCK_SIZE-1; y++){
 240         numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
 241         numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
 242         numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
 243         numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
 244         numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
 245         numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
 246         numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
 247         numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
 248         src+= stride;
 249     }
 250     return numEq > c->ppMode.flatnessThreshold;
 251 }
 252
 253 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
 254 {
 255     int i;
 256     for(i=0; i<2; i++){
 257         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 258         src += stride;
 259         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 260         src += stride;
 261         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 262         src += stride;
 263         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 264         src += stride;
 265     }
 266     return 1;
 267 }
 268
 269 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
 270 {
 271     int x;
 272     src+= stride*4;
 273     for(x=0; x<BLOCK_SIZE; x+=4){
 274         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 275         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 276         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 277         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 278     }
 279     return 1;
 280 }
 281
 282 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
 283 {
 284     if( isHorizDC_C(src, stride, c) ){
 285         return isHorizMinMaxOk_C(src, stride, c->QP);
 286     }else{
 287         return 2;
 288     }
 289 }
 290
 291 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
 292 {
 293     if( isVertDC_C(src, stride, c) ){
 294         return isVertMinMaxOk_C(src, stride, c->QP);
 295     }else{
 296         return 2;
 297     }
 298 }
 299
 300 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
 301 {
 302     int y;
 303     for(y=0; y<BLOCK_SIZE; y++){
 304         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 305
 306         if(FFABS(middleEnergy) < 8*c->QP){
 307             const int q=(dst[3] - dst[4])/2;
 308             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 309             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 310
 311             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 312             d= FFMAX(d, 0);
 313
 314             d= (5*d + 32) >> 6;
 315             d*= FFSIGN(-middleEnergy);
 316
 317             if(q>0)
 318             {
 319                 d = FFMAX(d, 0);
 320                 d = FFMIN(d, q);
 321             }
 322             else
 323             {
 324                 d = FFMIN(d, 0);
 325                 d = FFMAX(d, q);
 326             }
 327
 328             dst[3]-= d;
 329             dst[4]+= d;
 330         }
 331         dst+= stride;
 332     }
 333 }
 334
 335 /**
 336  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 337  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 338  */
 339 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
 340 {
 341     int y;
 342     for(y=0; y<BLOCK_SIZE; y++){
 343         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 344         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 345
 346         int sums[10];
 347         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 348         sums[1] = sums[0] - first  + dst[3];
 349         sums[2] = sums[1] - first  + dst[4];
 350         sums[3] = sums[2] - first  + dst[5];
 351         sums[4] = sums[3] - first  + dst[6];
 352         sums[5] = sums[4] - dst[0] + dst[7];
 353         sums[6] = sums[5] - dst[1] + last;
 354         sums[7] = sums[6] - dst[2] + last;
 355         sums[8] = sums[7] - dst[3] + last;
 356         sums[9] = sums[8] - dst[4] + last;
 357
 358         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 359         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 360         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 361         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 362         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 363         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 364         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 365         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 366
 367         dst+= stride;
 368     }
 369 }
 370
 371 /**
 372  * Experimental Filter 1 (Horizontal)
 373  * will not damage linear gradients
 374  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 375  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 376  * MMX2 version does correct clipping C version does not
 377  * not identical with the vertical one
 378  */
 379 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 380 {
 381     int y;
 382     static uint64_t lut[256];
 383     if(!lut[255])
 384     {
 385         int i;
 386         for(i=0; i<256; i++)
 387         {
 388             int v= i < 128 ? 2*i : 2*(i-256);
 389 /*
 390 //Simulate 112242211 9-Tap filter
 391             uint64_t a= (v/16)  & 0xFF;
 392             uint64_t b= (v/8)   & 0xFF;
 393             uint64_t c= (v/4)   & 0xFF;
 394             uint64_t d= (3*v/8) & 0xFF;
 395 */
 396 //Simulate piecewise linear interpolation
 397             uint64_t a= (v/16)   & 0xFF;
 398             uint64_t b= (v*3/16) & 0xFF;
 399             uint64_t c= (v*5/16) & 0xFF;
 400             uint64_t d= (7*v/16) & 0xFF;
 401             uint64_t A= (0x100 - a)&0xFF;
 402             uint64_t B= (0x100 - b)&0xFF;
 403             uint64_t C= (0x100 - c)&0xFF;
 404             uint64_t D= (0x100 - c)&0xFF;
 405
 406             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 407                        (D<<24) | (C<<16) | (B<<8)  | (A);
 408             //lut[i] = (v<<32) | (v<<24);
 409         }
 410     }
 411
 412     for(y=0; y<BLOCK_SIZE; y++){
 413         int a= src[1] - src[2];
 414         int b= src[3] - src[4];
 415         int c= src[5] - src[6];
 416
 417         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 418
 419         if(d < QP){
 420             int v = d * FFSIGN(-b);
 421
 422             src[1] +=v/8;
 423             src[2] +=v/4;
 424             src[3] +=3*v/8;
 425             src[4] -=3*v/8;
 426             src[5] -=v/4;
 427             src[6] -=v/8;
 428         }
 429         src+=stride;
 430     }
 431 }
 432
 433 /**
 434  * accurate deblock filter
 435  */
 436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
 437                                             int stride, const PPContext *c, int mode)
 438 {
 439     int y;
 440     const int QP= c->QP;
 441     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 442     const int dcThreshold= dcOffset*2 + 1;
 443 //START_TIMER
 444     src+= step*4; // src points to begin of the 8x8 Block
 445     for(y=0; y<8; y++){
 446         int numEq= 0;
 447
 448         numEq += ((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold;
 449         numEq += ((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold;
 450         numEq += ((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold;
 451         numEq += ((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold;
 452         numEq += ((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold;
 453         numEq += ((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold;
 454         numEq += ((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold;
 455         numEq += ((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold;
 456         numEq += ((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold;
 457         if(numEq > c->ppMode.flatnessThreshold){
 458             int min, max, x;
 459
 460             if(src[0] > src[step]){
 461                 max= src[0];
 462                 min= src[step];
 463             }else{
 464                 max= src[step];
 465                 min= src[0];
 466             }
 467             for(x=2; x<8; x+=2){
 468                 if(src[x*step] > src[(x+1)*step]){
 469                         if(src[x    *step] > max) max= src[ x   *step];
 470                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 471                 }else{
 472                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 473                         if(src[ x   *step] < min) min= src[ x   *step];
 474                 }
 475             }
 476             if(max-min < 2*QP){
 477                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 478                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 479
 480                 int sums[10];
 481                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 482                 sums[1] = sums[0] - first       + src[3*step];
 483                 sums[2] = sums[1] - first       + src[4*step];
 484                 sums[3] = sums[2] - first       + src[5*step];
 485                 sums[4] = sums[3] - first       + src[6*step];
 486                 sums[5] = sums[4] - src[0*step] + src[7*step];
 487                 sums[6] = sums[5] - src[1*step] + last;
 488                 sums[7] = sums[6] - src[2*step] + last;
 489                 sums[8] = sums[7] - src[3*step] + last;
 490                 sums[9] = sums[8] - src[4*step] + last;
 491
 492                 if (mode & VISUALIZE) {
 493                     src[0*step] =
 494                     src[1*step] =
 495                     src[2*step] =
 496                     src[3*step] =
 497                     src[4*step] =
 498                     src[5*step] =
 499                     src[6*step] =
 500                     src[7*step] = 128;
 501                 }
 502                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 503                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 504                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 505                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 506                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 507                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 508                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 509                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 510             }
 511         }else{
 512             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 513
 514             if(FFABS(middleEnergy) < 8*QP){
 515                 const int q=(src[3*step] - src[4*step])/2;
 516                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 517                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 518
 519                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 520                 d= FFMAX(d, 0);
 521
 522                 d= (5*d + 32) >> 6;
 523                 d*= FFSIGN(-middleEnergy);
 524
 525                 if(q>0){
 526                     d = FFMAX(d, 0);
 527                     d = FFMIN(d, q);
 528                 }else{
 529                     d = FFMIN(d, 0);
 530                     d = FFMAX(d, q);
 531                 }
 532
 533                 if ((mode & VISUALIZE) && d) {
 534                     d= (d < 0) ? 32 : -32;
 535                     src[3*step]= av_clip_uint8(src[3*step] - d);
 536                     src[4*step]= av_clip_uint8(src[4*step] + d);
 537                     d = 0;
 538                 }
 539
 540                 src[3*step]-= d;
 541                 src[4*step]+= d;
 542             }
 543         }
 544
 545         src += stride;
 546     }
 547 /*if(step==16){
 548     STOP_TIMER("step16")
 549 }else{
 550     STOP_TIMER("stepX")
 551 }*/
 552 }
 553
 554 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 555 //Plain C versions
 556 //we always compile C for testing which needs bitexactness
 557 #define TEMPLATE_PP_C 1
 558 #include "postprocess_template.c"
 559
 560 #if HAVE_ALTIVEC
 561 #   define TEMPLATE_PP_ALTIVEC 1
 562 #   include "postprocess_altivec_template.c"
 563 #   include "postprocess_template.c"
 564 #endif
 565
 566 #if ARCH_X86 && HAVE_INLINE_ASM
 567 #    if CONFIG_RUNTIME_CPUDETECT
 568 #        define TEMPLATE_PP_MMX 1
 569 #        include "postprocess_template.c"
 570 #        define TEMPLATE_PP_MMXEXT 1
 571 #        include "postprocess_template.c"
 572 #        define TEMPLATE_PP_3DNOW 1
 573 #        include "postprocess_template.c"
 574 #        define TEMPLATE_PP_SSE2 1
 575 #        include "postprocess_template.c"
 576 #    else
 577 #        if HAVE_SSE2_INLINE
 578 #            define TEMPLATE_PP_SSE2 1
 579 #            include "postprocess_template.c"
 580 #        elif HAVE_MMXEXT_INLINE
 581 #            define TEMPLATE_PP_MMXEXT 1
 582 #            include "postprocess_template.c"
 583 #        elif HAVE_AMD3DNOW_INLINE
 584 #            define TEMPLATE_PP_3DNOW 1
 585 #            include "postprocess_template.c"
 586 #        elif HAVE_MMX_INLINE
 587 #            define TEMPLATE_PP_MMX 1
 588 #            include "postprocess_template.c"
 589 #        endif
 590 #    endif
 591 #endif
 592
 593 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 594                       const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
 595
 596 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 597         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 598 {
 599     pp_fn pp = postProcess_C;
 600     PPContext *c= (PPContext *)vc;
 601     PPMode *ppMode= (PPMode *)vm;
 602     c->ppMode= *ppMode; //FIXME
 603
 604     if (!(ppMode->lumMode & BITEXACT)) {
 605 #if CONFIG_RUNTIME_CPUDETECT
 606 #if ARCH_X86 && HAVE_INLINE_ASM
 607         // ordered per speed fastest first
 608         if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
 609         else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
 610         else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
 611         else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
 612 #elif HAVE_ALTIVEC
 613         if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
 614 #endif
 615 #else /* CONFIG_RUNTIME_CPUDETECT */
 616 #if     HAVE_SSE2_INLINE
 617         pp = postProcess_SSE2;
 618 #elif   HAVE_MMXEXT_INLINE
 619         pp = postProcess_MMX2;
 620 #elif HAVE_AMD3DNOW_INLINE
 621         pp = postProcess_3DNow;
 622 #elif HAVE_MMX_INLINE
 623         pp = postProcess_MMX;
 624 #elif HAVE_ALTIVEC
 625         pp = postProcess_altivec;
 626 #endif
 627 #endif /* !CONFIG_RUNTIME_CPUDETECT */
 628     }
 629
 630     pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 631 }
 632
 633 /* -pp Command line Help
 634 */
 635 const char pp_help[] =
 636 "Available postprocessing filters:\n"
 637 "Filters                        Options\n"
 638 "short  long name       short   long option     Description\n"
 639 "*      *               a       autoq           CPU power dependent enabler\n"
 640 "                       c       chrom           chrominance filtering enabled\n"
 641 "                       y       nochrom         chrominance filtering disabled\n"
 642 "                       n       noluma          luma filtering disabled\n"
 643 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 644 "       1. difference factor: default=32, higher -> more deblocking\n"
 645 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 646 "                       the h & v deblocking filters share these\n"
 647 "                       so you can't set different thresholds for h / v\n"
 648 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 649 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 650 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 651 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 652 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 653 "dr     dering                                  deringing filter\n"
 654 "al     autolevels                              automatic brightness / contrast\n"
 655 "                       f        fullyrange     stretch luminance to (0..255)\n"
 656 "lb     linblenddeint                           linear blend deinterlacer\n"
 657 "li     linipoldeint                            linear interpolating deinterlace\n"
 658 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 659 "md     mediandeint                             median deinterlacer\n"
 660 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 661 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 662 "de     default                                 hb:a,vb:a,dr:a\n"
 663 "fa     fast                                    h1:a,v1:a,dr:a\n"
 664 "ac                                             ha:a:128:7,va:a,dr:a\n"
 665 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 666 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 667 "fq     forceQuant      <quantizer>             force quantizer\n"
 668 "Usage:\n"
 669 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 670 "long form example:\n"
 671 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 672 "short form example:\n"
 673 "vb:a/hb:a/lb                                   de,-vb\n"
 674 "more examples:\n"
 675 "tn:64:128:256\n"
 676 "\n"
 677 ;
 678
 679 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 680 {
 681     char temp[GET_MODE_BUFFER_SIZE];
 682     char *p= temp;
 683     static const char filterDelimiters[] = ",/";
 684     static const char optionDelimiters[] = ":|";
 685     struct PPMode *ppMode;
 686     char *filterToken;
 687
 688     if (!name)  {
 689         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
 690         return NULL;
 691     }
 692
 693     if (!strcmp(name, "help")) {
 694         const char *p;
 695         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
 696             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
 697             av_log(NULL, AV_LOG_INFO, "%s", temp);
 698         }
 699         return NULL;
 700     }
 701
 702     ppMode= av_malloc(sizeof(PPMode));
 703     if (!ppMode)
 704         return NULL;
 705
 706     ppMode->lumMode= 0;
 707     ppMode->chromMode= 0;
 708     ppMode->maxTmpNoise[0]= 700;
 709     ppMode->maxTmpNoise[1]= 1500;
 710     ppMode->maxTmpNoise[2]= 3000;
 711     ppMode->maxAllowedY= 234;
 712     ppMode->minAllowedY= 16;
 713     ppMode->baseDcDiff= 256/8;
 714     ppMode->flatnessThreshold= 56-16-1;
 715     ppMode->maxClippedThreshold= 0.01;
 716     ppMode->error=0;
 717
 718     memset(temp, 0, GET_MODE_BUFFER_SIZE);
 719     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
 720
 721     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 722
 723     for(;;){
 724         const char *filterName;
 725         int q= 1000000; //PP_QUALITY_MAX;
 726         int chrom=-1;
 727         int luma=-1;
 728         const char *option;
 729         const char *options[OPTIONS_ARRAY_SIZE];
 730         int i;
 731         int filterNameOk=0;
 732         int numOfUnknownOptions=0;
 733         int enable=1; //does the user want us to enabled or disabled the filter
 734         char *tokstate;
 735
 736         filterToken= av_strtok(p, filterDelimiters, &tokstate);
 737         if(!filterToken) break;
 738         p+= strlen(filterToken) + 1; // p points to next filterToken
 739         filterName= av_strtok(filterToken, optionDelimiters, &tokstate);
 740         if (!filterName) {
 741             ppMode->error++;
 742             break;
 743         }
 744         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 745
 746         if(*filterName == '-'){
 747             enable=0;
 748             filterName++;
 749         }
 750
 751         for(;;){ //for all options
 752             option= av_strtok(NULL, optionDelimiters, &tokstate);
 753             if(!option) break;
 754
 755             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 756             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 757             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 758             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 759             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 760             else{
 761                 options[numOfUnknownOptions] = option;
 762                 numOfUnknownOptions++;
 763             }
 764             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 765         }
 766         options[numOfUnknownOptions] = NULL;
 767
 768         /* replace stuff from the replace Table */
 769         for(i=0; replaceTable[2*i]; i++){
 770             if(!strcmp(replaceTable[2*i], filterName)){
 771                 int newlen= strlen(replaceTable[2*i + 1]);
 772                 int plen;
 773                 int spaceLeft;
 774
 775                 p--, *p=',';
 776
 777                 plen= strlen(p);
 778                 spaceLeft= p - temp + plen;
 779                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
 780                     ppMode->error++;
 781                     break;
 782                 }
 783                 memmove(p + newlen, p, plen+1);
 784                 memcpy(p, replaceTable[2*i + 1], newlen);
 785                 filterNameOk=1;
 786             }
 787         }
 788
 789         for(i=0; filters[i].shortName; i++){
 790             if(   !strcmp(filters[i].longName, filterName)
 791                || !strcmp(filters[i].shortName, filterName)){
 792                 ppMode->lumMode &= ~filters[i].mask;
 793                 ppMode->chromMode &= ~filters[i].mask;
 794
 795                 filterNameOk=1;
 796                 if(!enable) break; // user wants to disable it
 797
 798                 if(q >= filters[i].minLumQuality && luma)
 799                     ppMode->lumMode|= filters[i].mask;
 800                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 801                     if(q >= filters[i].minChromQuality)
 802                             ppMode->chromMode|= filters[i].mask;
 803
 804                 if(filters[i].mask == LEVEL_FIX){
 805                     int o;
 806                     ppMode->minAllowedY= 16;
 807                     ppMode->maxAllowedY= 234;
 808                     for(o=0; options[o]; o++){
 809                         if(  !strcmp(options[o],"fullyrange")
 810                            ||!strcmp(options[o],"f")){
 811                             ppMode->minAllowedY= 0;
 812                             ppMode->maxAllowedY= 255;
 813                             numOfUnknownOptions--;
 814                         }
 815                     }
 816                 }
 817                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 818                 {
 819                     int o;
 820                     int numOfNoises=0;
 821
 822                     for(o=0; options[o]; o++){
 823                         char *tail;
 824                         ppMode->maxTmpNoise[numOfNoises]=
 825                             strtol(options[o], &tail, 0);
 826                         if(tail!=options[o]){
 827                             numOfNoises++;
 828                             numOfUnknownOptions--;
 829                             if(numOfNoises >= 3) break;
 830                         }
 831                     }
 832                 }
 833                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 834                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 835                     int o;
 836
 837                     for(o=0; options[o] && o<2; o++){
 838                         char *tail;
 839                         int val= strtol(options[o], &tail, 0);
 840                         if(tail==options[o]) break;
 841
 842                         numOfUnknownOptions--;
 843                         if(o==0) ppMode->baseDcDiff= val;
 844                         else ppMode->flatnessThreshold= val;
 845                     }
 846                 }
 847                 else if(filters[i].mask == FORCE_QUANT){
 848                     int o;
 849                     ppMode->forcedQuant= 15;
 850
 851                     for(o=0; options[o] && o<1; o++){
 852                         char *tail;
 853                         int val= strtol(options[o], &tail, 0);
 854                         if(tail==options[o]) break;
 855
 856                         numOfUnknownOptions--;
 857                         ppMode->forcedQuant= val;
 858                     }
 859                 }
 860             }
 861         }
 862         if(!filterNameOk) ppMode->error++;
 863         ppMode->error += numOfUnknownOptions;
 864     }
 865
 866     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 867     if(ppMode->error){
 868         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 869         av_free(ppMode);
 870         return NULL;
 871     }
 872     return ppMode;
 873 }
 874
 875 void pp_free_mode(pp_mode *mode){
 876     av_free(mode);
 877 }
 878
 879 static void reallocAlign(void **p, int size){
 880     av_free(*p);
 881     *p= av_mallocz(size);
 882 }
 883
 884 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 885     int mbWidth = (width+15)>>4;
 886     int mbHeight= (height+15)>>4;
 887     int i;
 888
 889     c->stride= stride;
 890     c->qpStride= qpStride;
 891
 892     reallocAlign((void **)&c->tempDst, stride*24+32);
 893     reallocAlign((void **)&c->tempSrc, stride*24);
 894     reallocAlign((void **)&c->tempBlocks, 2*16*8);
 895     reallocAlign((void **)&c->yHistogram, 256*sizeof(uint64_t));
 896     for(i=0; i<256; i++)
 897             c->yHistogram[i]= width*height/64*15/256;
 898
 899     for(i=0; i<3; i++){
 900         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
 901         reallocAlign((void **)&c->tempBlurred[i], stride*mbHeight*16 + 17*1024);
 902         reallocAlign((void **)&c->tempBlurredPast[i], 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 903     }
 904
 905     reallocAlign((void **)&c->deintTemp, 2*width+32);
 906     reallocAlign((void **)&c->nonBQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 907     reallocAlign((void **)&c->stdQPTable, qpStride*mbHeight*sizeof(QP_STORE_T));
 908     reallocAlign((void **)&c->forcedQPTable, mbWidth*sizeof(QP_STORE_T));
 909 }
 910
 911 static const char * context_to_name(void * ptr) {
 912     return "postproc";
 913 }
 914
 915 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 916
 917 pp_context *pp_get_context(int width, int height, int cpuCaps){
 918     PPContext *c= av_mallocz(sizeof(PPContext));
 919     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 920     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 921
 922     if (!c)
 923         return NULL;
 924
 925     c->av_class = &av_codec_context_class;
 926     if(cpuCaps&PP_FORMAT){
 927         c->hChromaSubSample= cpuCaps&0x3;
 928         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 929     }else{
 930         c->hChromaSubSample= 1;
 931         c->vChromaSubSample= 1;
 932     }
 933     if (cpuCaps & PP_CPU_CAPS_AUTO) {
 934         c->cpuCaps = av_get_cpu_flags();
 935     } else {
 936         c->cpuCaps = 0;
 937         if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
 938         if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
 939         if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
 940         if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
 941     }
 942
 943     reallocBuffers(c, width, height, stride, qpStride);
 944
 945     c->frameNum=-1;
 946
 947     return c;
 948 }
 949
 950 void pp_free_context(void *vc){
 951     PPContext *c = (PPContext*)vc;
 952     int i;
 953
 954     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurred); i++)
 955         av_free(c->tempBlurred[i]);
 956     for(i=0; i<FF_ARRAY_ELEMS(c->tempBlurredPast); i++)
 957         av_free(c->tempBlurredPast[i]);
 958
 959     av_free(c->tempBlocks);
 960     av_free(c->yHistogram);
 961     av_free(c->tempDst);
 962     av_free(c->tempSrc);
 963     av_free(c->deintTemp);
 964     av_free(c->stdQPTable);
 965     av_free(c->nonBQPTable);
 966     av_free(c->forcedQPTable);
 967
 968     memset(c, 0, sizeof(PPContext));
 969
 970     av_free(c);
 971 }
 972
 973 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 974                      uint8_t * dst[3], const int dstStride[3],
 975                      int width, int height,
 976                      const QP_STORE_T *QP_store,  int QPStride,
 977                      pp_mode *vm,  void *vc, int pict_type)
 978 {
 979     int mbWidth = (width+15)>>4;
 980     int mbHeight= (height+15)>>4;
 981     PPMode *mode = vm;
 982     PPContext *c = vc;
 983     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
 984     int absQPStride = FFABS(QPStride);
 985
 986     // c->stride and c->QPStride are always positive
 987     if(c->stride < minStride || c->qpStride < absQPStride)
 988         reallocBuffers(c, width, height,
 989                        FFMAX(minStride, c->stride),
 990                        FFMAX(c->qpStride, absQPStride));
 991
 992     if(!QP_store || (mode->lumMode & FORCE_QUANT)){
 993         int i;
 994         QP_store= c->forcedQPTable;
 995         absQPStride = QPStride = 0;
 996         if(mode->lumMode & FORCE_QUANT)
 997             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
 998         else
 999             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1000     }
1001
1002     if(pict_type & PP_PICT_TYPE_QP2){
1003         int i;
1004         const int count= FFMAX(mbHeight * absQPStride, mbWidth);
1005         for(i=0; i<(count>>2); i++){
1006             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1007         }
1008         for(i<<=2; i<count; i++){
1009             c->stdQPTable[i] = QP_store[i]>>1;
1010         }
1011         QP_store= c->stdQPTable;
1012         QPStride= absQPStride;
1013     }
1014
1015     if(0){
1016         int x,y;
1017         for(y=0; y<mbHeight; y++){
1018             for(x=0; x<mbWidth; x++){
1019                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1020             }
1021             av_log(c, AV_LOG_INFO, "\n");
1022         }
1023         av_log(c, AV_LOG_INFO, "\n");
1024     }
1025
1026     if((pict_type&7)!=3){
1027         if (QPStride >= 0){
1028             int i;
1029             const int count= FFMAX(mbHeight * QPStride, mbWidth);
1030             for(i=0; i<(count>>2); i++){
1031                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1032             }
1033             for(i<<=2; i<count; i++){
1034                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1035             }
1036         } else {
1037             int i,j;
1038             for(i=0; i<mbHeight; i++) {
1039                 for(j=0; j<absQPStride; j++) {
1040                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1041                 }
1042             }
1043         }
1044     }
1045
1046     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1047            mode->lumMode, mode->chromMode);
1048
1049     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1050                 width, height, QP_store, QPStride, 0, mode, c);
1051
1052     if (!(src[1] && src[2] && dst[1] && dst[2]))
1053         return;
1054
1055     width  = (width )>>c->hChromaSubSample;
1056     height = (height)>>c->vChromaSubSample;
1057
1058     if(mode->chromMode){
1059         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1060                     width, height, QP_store, QPStride, 1, mode, c);
1061         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1062                     width, height, QP_store, QPStride, 2, mode, c);
1063     }
1064     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1065         linecpy(dst[1], src[1], height, srcStride[1]);
1066         linecpy(dst[2], src[2], height, srcStride[2]);
1067     }else{
1068         int y;
1069         for(y=0; y<height; y++){
1070             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1071             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1072         }
1073     }
1074 }