OSDN Git Service

Merge remote-tracking branch 'qatar/master'
authorMichael Niedermayer <michaelni@gmx.at>
Wed, 29 Jun 2011 02:08:31 +0000 (04:08 +0200)
committerMichael Niedermayer <michaelni@gmx.at>
Wed, 29 Jun 2011 03:23:12 +0000 (05:23 +0200)
* qatar/master: (21 commits)
  swscale: Add Doxygen for hyscale_fast/hScale.
  fate: enable lavfi-pixmt tests on big endian systems
  PPC: swscale: disable altivec functions for unsupported formats
  fate: merge identical pixdesc_be/le tests
  swscale: Add Doxygen for yuv2planar*/yuv2packed* functions.
  build: call texi2pod.pl with full path instead of symlink
  build: include sub-makefiles using full path instead of symlinks
  swscale: update big endian reference values after dff5a835.
  wavpack: skip blocks with no samples
  cosmetics: remove outdated comment that is no longer true
  build: replace some addprefix/addsuffix with substitution refs
  avutil: Remove unused arbitrary precision integer code.
  configure: Drop check for availability of ten assembler operands.
  aacenc: Save channel configuration for later use.
  aacenc: Fix codebook trellising for zeroed bands.
  swscale: change prototypes of scaled YUV output functions.
  swscale: re-add support for non-native endianness.
  swscale: disentangle yuv2rgbX_c_full() into small functions.
  swscale: split yuv2packed[12X]_c() remainders into small functions.
  swscale: split yuv2packedX_altivec in smaller functions.
  ...

Conflicts:
Makefile
configure
libavcodec/x86/dsputil_mmx.c
libavfilter/Makefile
libavformat/Makefile
libavutil/integer.c
libavutil/integer.h
libswscale/swscale.c
libswscale/swscale_internal.h
libswscale/x86/swscale_template.c
tests/ref/lavfi/pixdesc_le
tests/ref/lavfi/pixfmts_scale

Merged-by: Michael Niedermayer <michaelni@gmx.at>
33 files changed:
1  2 
Makefile
common.mak
configure
libavcodec/Makefile
libavcodec/aaccoder.c
libavcodec/aacenc.c
libavcodec/aacenc.h
libavcodec/ac3enc.c
libavcodec/utils.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/mlpdsp.c
libavdevice/Makefile
libavfilter/Makefile
libavformat/Makefile
libavformat/wv.c
libavutil/Makefile
libavutil/mathematics.c
libpostproc/Makefile
libswscale/Makefile
libswscale/ppc/swscale_altivec.c
libswscale/ppc/yuv2rgb_altivec.c
libswscale/ppc/yuv2rgb_altivec.h
libswscale/swscale.c
libswscale/swscale_internal.h
libswscale/utils.c
libswscale/x86/swscale_template.c
subdir.mak
tests/lavfi-regression.sh
tests/ref/lavfi/pixdesc
tests/ref/lavfi/pixfmts_copy
tests/ref/lavfi/pixfmts_null
tests/ref/lavfi/pixfmts_scale
tests/ref/lavfi/pixfmts_vflip

diff --cc Makefile
+++ b/Makefile
@@@ -39,8 -79,7 +39,8 @@@ DATA_FILES := $(wildcard $(SRC_PATH)/ff
  
  SKIPHEADERS = cmdutils_common_opts.h
  
- include common.mak
 +MAIN_MAKEFILE=1
+ include $(SRC_PATH)/common.mak
  
  FF_LDFLAGS   := $(FFLDFLAGS)
  FF_EXTRALIBS := $(FFEXTRALIBS)
@@@ -143,12 -178,20 +143,12 @@@ distclean:
        $(RM) config.* .version version.h libavutil/avconfig.h
  
  config:
 -      $(SRC_PATH)/configure $(value LIBAV_CONFIGURATION)
 +      $(SRC_PATH)/configure $(value FFMPEG_CONFIGURATION)
  
 -check: test checkheaders
 +check: test
  
- include doc/Makefile
- include tests/Makefile
+ include $(SRC_PATH)/doc/Makefile
+ include $(SRC_PATH)/tests/Makefile
  
 -# Dummy rule to stop make trying to rebuild removed or renamed headers
 -%.h:
 -      @:
 -
 -# Disable suffix rules.  Most of the builtin rules are suffix rules,
 -# so this saves some time on slow systems.
 -.SUFFIXES:
 -
  .PHONY: all alltools *clean check config examples install*
  .PHONY: testprogs uninstall*
diff --cc common.mak
Simple merge
diff --cc configure
+++ b/configure
@@@ -1125,8 -1120,6 +1125,7 @@@ HAVE_LIST=
      sys_select_h
      sys_soundcard_h
      sys_videoio_h
-     ten_operands
 +    termios_h
      threads
      trunc
      truncf
@@@ -3300,41 -3196,12 +3272,16 @@@ if enabled source_path_used; the
          libswscale
          libswscale/$arch
          tests
 +        tests/fate
          tools
      "
-     FILES="
-         Makefile
-         common.mak
-         subdir.mak
-         doc/Makefile
-         doc/texi2pod.pl
-         libavcodec/Makefile
-         libavcodec/${arch}/Makefile
-         libavdevice/Makefile
-         libavfilter/Makefile
-         libavfilter/${arch}/Makefile
-         libavformat/Makefile
-         libavutil/Makefile
-         libpostproc/Makefile
-         libswscale/Makefile
-         tests/Makefile
-         tests/fate.mak
-         tests/fate2.mak
-         tests/fate/aac.mak
-         tests/fate/als.mak
-         tests/fate/fft.mak
-         tests/fate/h264.mak
-         tests/fate/mp3.mak
-         tests/fate/vorbis.mak
-         tests/fate/vp8.mak
-     "
++
      map 'mkdir -p $v' $DIRS;
-     map 'test -f "$source_path/$v" && $ln_s "$source_path/$v" $v' $FILES
+     $ln_s "$source_path/Makefile" .
  fi
  
 +enabled stripping || strip="echo skipping strip"
 +
  config_files="$TMPH config.mak"
  
  cat > config.mak <<EOF
Simple merge
Simple merge
@@@ -521,11 -504,10 +521,11 @@@ static int aac_encode_frame(AVCodecCont
          } else {
              start_ch = 0;
              samples2 = s->samples + 1024 * avctx->channels;
-             for (i = 0; i < chan_map[0]; i++) {
-                 tag = chan_map[i+1];
+             for (i = 0; i < s->chan_map[0]; i++) {
+                 tag = s->chan_map[i+1];
                  chans = tag == TYPE_CPE ? 2 : 1;
 -                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch,
 +                ff_psy_preprocess(s->psypp,
 +                                  (uint16_t*)data + channel_maps[avctx->channels-1][start_ch],
                                    samples2 + start_ch, start_ch, chans);
                  start_ch += chans;
              }
Simple merge
Simple merge
Simple merge
@@@ -580,7 -580,29 +580,7 @@@ static void add_bytes_mmx(uint8_t *dst
          dst[i+0] += src[i+0];
  }
  
- #if HAVE_7REGS && HAVE_TEN_OPERANDS
 -static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
 -    x86_reg i=0;
 -    __asm__ volatile(
 -        "jmp 2f                         \n\t"
 -        "1:                             \n\t"
 -        "movq   (%2, %0), %%mm0         \n\t"
 -        "movq  8(%2, %0), %%mm1         \n\t"
 -        "paddb  (%3, %0), %%mm0         \n\t"
 -        "paddb 8(%3, %0), %%mm1         \n\t"
 -        "movq %%mm0,  (%1, %0)          \n\t"
 -        "movq %%mm1, 8(%1, %0)          \n\t"
 -        "add $16, %0                    \n\t"
 -        "2:                             \n\t"
 -        "cmp %4, %0                     \n\t"
 -        " js 1b                         \n\t"
 -        : "+r" (i)
 -        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
 -    );
 -    for(; i<w; i++)
 -        dst[i] = src1[i] + src2[i];
 -}
 -
+ #if HAVE_7REGS
  static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) {
      x86_reg w2 = -w;
      x86_reg x;
Simple merge
Simple merge
@@@ -68,77 -55,11 +68,77 @@@ OBJS-$(CONFIG_COLOR_FILTER
  OBJS-$(CONFIG_FREI0R_SRC_FILTER)             += vf_frei0r.o
  OBJS-$(CONFIG_MOVIE_FILTER)                  += vsrc_movie.o
  OBJS-$(CONFIG_NULLSRC_FILTER)                += vsrc_nullsrc.o
 +OBJS-$(CONFIG_TESTSRC_FILTER)                += vsrc_testsrc.o
  
 +OBJS-$(CONFIG_BUFFERSINK_FILTER)             += vsink_buffer.o
  OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
  
- -include $(SUBDIR)$(ARCH)/Makefile
 +
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/mp_image.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/img_format.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_2xsai.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_blackframe.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_boxblur.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_cropdetect.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_decimate.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_delogo.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_denoise3d.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_detc.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_dint.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_divtc.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_down3dright.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_dsize.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_eq2.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_eq.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_field.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_fil.o
 +#OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_filmdint.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_fixpts.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_framestep.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_fspp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_geq.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_gradfun.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_harddup.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_hqdn3d.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_hue.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_il.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_ilpack.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_ivtc.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_kerndeint.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_mcdeint.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_mirror.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_noise.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_ow.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_palette.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_perspective.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_phase.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_pp7.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_pullup.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_qp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_rectangle.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_remove_logo.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_rgbtest.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_rotate.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_sab.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_screenshot.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_smartblur.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_softpulldown.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_softskip.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_spp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_swapuv.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_telecine.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_test.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_tile.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_tinterlace.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_unsharp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_uspp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_yuvcsp.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_yvu9.o
 +OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/pullup.o
 +
 +
+ -include $(SRC_PATH)/$(SUBDIR)$(ARCH)/Makefile
  
 -DIRS = x86
 +DIRS = x86 libmpcodecs
  
- include $(SUBDIR)../subdir.mak
+ include $(SRC_PATH)/subdir.mak
@@@ -340,6 -333,9 +340,6 @@@ OBJS-$(CONFIG_UDP_PROTOCOL
  # libavdevice dependencies
  OBJS-$(CONFIG_JACK_INDEV)                += timefilter.o
  
 -EXAMPLES  = output
  TESTPROGS = timefilter
  
- include $(SUBDIR)../subdir.mak
+ include $(SRC_PATH)/subdir.mak
 -
 -$(SUBDIR)output-example$(EXESUF): ELIBS = -lswscale
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -385,14 -287,15 +382,16 @@@ static void yuv2yuvX_c(SwsContext *c, c
                         const int16_t *chrFilter, const int16_t **chrUSrc,
                         const int16_t **chrVSrc,
                         int chrFilterSize, const int16_t **alpSrc,
-                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                        uint8_t *aDest, int dstW, int chrDstW,
 -                       uint8_t *dest[4], int dstW, int chrDstW)
++                       uint8_t *dest[4], int dstW, int chrDstW,
 +                       const uint8_t *lumDither, const uint8_t *chrDither)
  {
-     //FIXME Optimize (just quickly written not optimized..)
+     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
      int i;
+     //FIXME Optimize (just quickly written not optimized..)
      for (i=0; i<dstW; i++) {
 -        int val=1<<18;
 +        int val = lumDither[i&7] << 12;
          int j;
          for (j=0; j<lumFilterSize; j++)
              val += lumSrc[j][i] * lumFilter[j];
  }
  
  static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                               const int16_t *chrUSrc, const int16_t *chrVSrc,
-                               const int16_t *alpSrc,
-                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
+                        const int16_t *chrUSrc, const int16_t *chrVSrc,
+                        const int16_t *alpSrc,
 -                       uint8_t *dest[4], int dstW, int chrDstW)
++                       uint8_t *dest[4], int dstW, int chrDstW,
++                       const uint8_t *lumDither, const uint8_t *chrDither)
  {
+     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
      int i;
  
      for (i=0; i<dstW; i++) {
 -        int val= (lumSrc[i]+64)>>7;
 +        int val= (lumSrc[i]+lumDither[i&7])>>7;
-         dest[i]= av_clip_uint8(val);
+         yDest[i]= av_clip_uint8(val);
      }
  
      if (uDest)
@@@ -457,11 -361,10 +458,11 @@@ static void yuv2nv12X_c(SwsContext *c, 
                          const int16_t **lumSrc, int lumFilterSize,
                          const int16_t *chrFilter, const int16_t **chrUSrc,
                          const int16_t **chrVSrc, int chrFilterSize,
-                         const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
-                         uint8_t *vDest, uint8_t *aDest,
+                         const int16_t **alpSrc, uint8_t *dest[4],
 -                        int dstW, int chrDstW)
 +                        int dstW, int chrDstW,
 +                        const uint8_t *lumDither, const uint8_t *chrDither)
  {
+     uint8_t *yDest = dest[0], *uDest = dest[1];
      enum PixelFormat dstFormat = c->dstFormat;
  
      //FIXME Optimize (just quickly written not optimized..)
@@@ -1395,9 -1097,186 +1195,185 @@@ yuv2rgb_1_c_template(SwsContext *c, con
      int i;
  
      if (uvalpha < 2048) {
-         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
+         for (i = 0; i < (dstW >> 1); i++) {
+             int Y1 = buf0[i * 2]     >> 7;
+             int Y2 = buf0[i * 2 + 1] >> 7;
+             int U  = ubuf1[i]        >> 7;
+             int V  = vbuf1[i]        >> 7;
+             int A1, A2;
+             const void *r =  c->table_rV[V],
+                        *g = (c->table_gU[U] + c->table_gV[V]),
+                        *b =  c->table_bU[U];
+             if (hasAlpha) {
+                 A1 = abuf0[i * 2    ] >> 7;
+                 A2 = abuf0[i * 2 + 1] >> 7;
+             }
+             yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                           r, g, b, y, target, hasAlpha);
+         }
      } else {
-         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
+         for (i = 0; i < (dstW >> 1); i++) {
+             int Y1 =  buf0[i * 2]          >> 7;
+             int Y2 =  buf0[i * 2 + 1]      >> 7;
+             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
+             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
+             int A1, A2;
+             const void *r =  c->table_rV[V],
+                        *g = (c->table_gU[U] + c->table_gV[V]),
+                        *b =  c->table_bU[U];
+             if (hasAlpha) {
+                 A1 = abuf0[i * 2    ] >> 7;
+                 A2 = abuf0[i * 2 + 1] >> 7;
+             }
+             yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                           r, g, b, y, target, hasAlpha);
+         }
+     }
+ }
+ #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
+ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
+                                 const int16_t **lumSrc, int lumFilterSize, \
+                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
+                                 const int16_t **chrVSrc, int chrFilterSize, \
+                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
+                                 int y) \
+ { \
+     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
+ }
+ #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+ YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
+ static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
+                                 int yalpha, int uvalpha, int y) \
+ { \
+     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
+                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
+ } \
+  \
+ static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
+                                 int uvalpha, int y) \
+ { \
+     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+                                   dstW, uvalpha, y, fmt, hasAlpha); \
+ }
+ #if CONFIG_SMALL
+ YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+ YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
+ #else
+ #if CONFIG_SWSCALE_ALPHA
+ YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1);
+ YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1);
+ #endif
+ YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0);
+ YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0);
+ #endif
+ YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0);
+ YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0);
+ YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0);
+ YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0);
+ YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0);
+ YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0);
+ YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0);
+ YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0);
+ static av_always_inline void
+ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                           const int16_t **lumSrc, int lumFilterSize,
+                           const int16_t *chrFilter, const int16_t **chrUSrc,
+                           const int16_t **chrVSrc, int chrFilterSize,
+                           const int16_t **alpSrc, uint8_t *dest,
+                           int dstW, int y, enum PixelFormat target, int hasAlpha)
+ {
+     int i;
+     int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
+     for (i = 0; i < dstW; i++) {
+         int j;
 -        int Y = 0;
 -        int U = -128 << 19;
 -        int V = -128 << 19;
++        int Y = 1<<9;
++        int U = (1<<9)-(128 << 19);
++        int V = (1<<9)-(128 << 19);
+         int av_unused A;
+         int R, G, B;
+         for (j = 0; j < lumFilterSize; j++) {
+             Y += lumSrc[j][i] * lumFilter[j];
+         }
+         for (j = 0; j < chrFilterSize; j++) {
+             U += chrUSrc[j][i] * chrFilter[j];
+             V += chrVSrc[j][i] * chrFilter[j];
+         }
+         Y >>= 10;
+         U >>= 10;
+         V >>= 10;
+         if (hasAlpha) {
 -            A = 1 << 21;
++            A = 1 << 18;
+             for (j = 0; j < lumFilterSize; j++) {
+                 A += alpSrc[j][i] * lumFilter[j];
+             }
+             A >>= 19;
+             if (A & 0x100)
+                 A = av_clip_uint8(A);
+         }
+         Y -= c->yuv2rgb_y_offset;
+         Y *= c->yuv2rgb_y_coeff;
+         Y += 1 << 21;
+         R = Y + V*c->yuv2rgb_v2r_coeff;
+         G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
+         B = Y +                          U*c->yuv2rgb_u2b_coeff;
+         if ((R | G | B) & 0xC0000000) {
+             R = av_clip_uintp2(R, 30);
+             G = av_clip_uintp2(G, 30);
+             B = av_clip_uintp2(B, 30);
+         }
+         switch(target) {
+         case PIX_FMT_ARGB:
+             dest[0] = hasAlpha ? A : 255;
+             dest[1] = R >> 22;
+             dest[2] = G >> 22;
+             dest[3] = B >> 22;
+             break;
+         case PIX_FMT_RGB24:
+             dest[0] = R >> 22;
+             dest[1] = G >> 22;
+             dest[2] = B >> 22;
+             break;
+         case PIX_FMT_RGBA:
+             dest[0] = R >> 22;
+             dest[1] = G >> 22;
+             dest[2] = B >> 22;
+             dest[3] = hasAlpha ? A : 255;
+             break;
+         case PIX_FMT_ABGR:
+             dest[0] = hasAlpha ? A : 255;
+             dest[1] = B >> 22;
+             dest[2] = G >> 22;
+             dest[3] = R >> 22;
 -            dest += 4;
+             break;
+         case PIX_FMT_BGR24:
+             dest[0] = B >> 22;
+             dest[1] = G >> 22;
+             dest[2] = R >> 22;
+             break;
+         case PIX_FMT_BGRA:
+             dest[0] = B >> 22;
+             dest[1] = G >> 22;
+             dest[2] = R >> 22;
+             dest[3] = hasAlpha ? A : 255;
+             break;
+         }
+         dest += step;
      }
  }
  
@@@ -2113,8 -1952,71 +2109,74 @@@ find_c_packed_planar_out_funcs(SwsConte
          *yuv2yuvX     = yuv2yuvX_c;
      }
      if(c->flags & SWS_FULL_CHR_H_INT) {
-         *yuv2packedX = yuv2rgbX_c_full;
+         switch (dstFormat) {
+             case PIX_FMT_RGBA:
+ #if CONFIG_SMALL
+                 *yuv2packedX = yuv2rgba32_full_X_c;
+ #else
+ #if CONFIG_SWSCALE_ALPHA
+                 if (c->alpPixBuf) {
+                     *yuv2packedX = yuv2rgba32_full_X_c;
+                 } else
+ #endif /* CONFIG_SWSCALE_ALPHA */
+                 {
+                     *yuv2packedX = yuv2rgbx32_full_X_c;
+                 }
+ #endif /* !CONFIG_SMALL */
+                 break;
+             case PIX_FMT_ARGB:
+ #if CONFIG_SMALL
+                 *yuv2packedX = yuv2argb32_full_X_c;
+ #else
+ #if CONFIG_SWSCALE_ALPHA
+                 if (c->alpPixBuf) {
+                     *yuv2packedX = yuv2argb32_full_X_c;
+                 } else
+ #endif /* CONFIG_SWSCALE_ALPHA */
+                 {
+                     *yuv2packedX = yuv2xrgb32_full_X_c;
+                 }
+ #endif /* !CONFIG_SMALL */
+                 break;
+             case PIX_FMT_BGRA:
+ #if CONFIG_SMALL
+                 *yuv2packedX = yuv2bgra32_full_X_c;
+ #else
+ #if CONFIG_SWSCALE_ALPHA
+                 if (c->alpPixBuf) {
+                     *yuv2packedX = yuv2bgra32_full_X_c;
+                 } else
+ #endif /* CONFIG_SWSCALE_ALPHA */
+                 {
+                     *yuv2packedX = yuv2bgrx32_full_X_c;
+                 }
+ #endif /* !CONFIG_SMALL */
+                 break;
+             case PIX_FMT_ABGR:
+ #if CONFIG_SMALL
+                 *yuv2packedX = yuv2abgr32_full_X_c;
+ #else
+ #if CONFIG_SWSCALE_ALPHA
+                 if (c->alpPixBuf) {
+                     *yuv2packedX = yuv2abgr32_full_X_c;
+                 } else
+ #endif /* CONFIG_SWSCALE_ALPHA */
+                 {
+                     *yuv2packedX = yuv2xbgr32_full_X_c;
+                 }
+ #endif /* !CONFIG_SMALL */
+                 break;
+             case PIX_FMT_RGB24:
+             *yuv2packedX = yuv2rgb24_full_X_c;
+             break;
+         case PIX_FMT_BGR24:
+             *yuv2packedX = yuv2bgr24_full_X_c;
+             break;
+         }
++        if(!*yuv2packedX)
++            goto YUV_PACKED;
      } else {
++        YUV_PACKED:
          switch (dstFormat) {
          case PIX_FMT_GRAY16BE:
              *yuv2packed1 = yuv2gray16BE_1_c;
@@@ -2276,13 -2266,13 +2427,15 @@@ static int swScale(SwsContext *c, cons
      lastDstY= dstY;
  
      for (;dstY < dstH; dstY++) {
-         unsigned char *dest =dst[0]+dstStride[0]*dstY;
          const int chrDstY= dstY>>c->chrDstVSubSample;
-         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
-         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
-         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
+         uint8_t *dest[4] = {
+             dst[0] + dstStride[0] * dstY,
+             dst[1] + dstStride[1] * chrDstY,
+             dst[2] + dstStride[2] * chrDstY,
+             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
+         };
 +        const uint8_t *lumDither= should_dither ? dithers[7][dstY   &7] : flat64;
 +        const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
  
          const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
          const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
              const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
              const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
              const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
 +
              if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                  const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
+                 if ((dstY&chrSkipMask) || isGray(dstFormat))
+                     dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
                  if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
-                     const int16_t *lumBuf = lumSrcPtr[0];
-                     const int16_t *chrUBuf= chrUSrcPtr[0];
-                     const int16_t *chrVBuf= chrVSrcPtr[0];
                      const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                     yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
-                                 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
+                     yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
 -                             dest, dstW, chrDstW);
++                             dest, dstW, chrDstW, lumDither, chrDither);
                  } else { //General YV12
-                     yuv2yuvX(c,
-                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                 chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
+                     yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
+                              lumSrcPtr, vLumFilterSize,
+                              vChrFilter + chrDstY * vChrFilterSize,
+                              chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
 -                             alpSrcPtr, dest, dstW, chrDstW);
++                             alpSrcPtr, dest, dstW, chrDstW, lumDither, chrDither);
                  }
              } else {
                  assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
@@@ -62,33 -73,108 +75,109 @@@ typedef int (*SwsFunc)(struct SwsContex
  typedef void (*yuv2planar1_fn) (struct SwsContext *c,
                                  const int16_t *lumSrc, const int16_t *chrUSrc,
                                  const int16_t *chrVSrc, const int16_t *alpSrc,
-                                 uint8_t *dest,
-                                 uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                                 int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
- typedef void (*yuv2planarX_fn) (struct SwsContext *c,
-                                 const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
 -                                uint8_t *dest[4], int dstW, int chrDstW);
++                                uint8_t *dest[4], int dstW, int chrDstW,
++                                const uint8_t *lumDither, const uint8_t *chrDither);
+ /**
+  * Write one line of horizontally scaled Y/U/V/A to planar output
+  * with multi-point vertical scaling between input pixels.
+  *
+  * @param c             SWS scaling context
+  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+  * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+  * @param lumFilterSize number of vertical luma/alpha input lines to scale
+  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+  * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+  * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+  * @param chrFilterSize number of vertical chroma input lines to scale
+  * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+  * @param dest          pointer to the 4 output planes (Y/U/V/A)
+  * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
+  * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
+  */
+ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
                                  const int16_t *chrFilter, const int16_t **chrUSrc,
-                                 const int16_t **chrVSrc, int chrFilterSize,
-                                 const int16_t **alpSrc,
-                                 uint8_t *dest,
-                                 uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                 const int16_t **chrVSrc,  int chrFilterSize,
+                                 const int16_t **alpSrc, uint8_t *dest[4],
 -                                int dstW, int chrDstW);
 +                                int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
- typedef void (*yuv2packed1_fn) (struct SwsContext *c,
-                                 const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0,
-                                 uint8_t *dest,
-                                 int dstW, int uvalpha, int dstFormat, int flags, int y);
- typedef void (*yuv2packed2_fn) (struct SwsContext *c,
-                                 const uint16_t *buf0, const uint16_t *buf1,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, const uint16_t *abuf1,
-                                 uint8_t *dest,
+ /**
+  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+  * output without any additional vertical scaling (or point-scaling). Note
+  * that this function may do chroma scaling, see the "uvalpha" argument.
+  *
+  * @param c       SWS scaling context
+  * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+  * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+  * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+  * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+  * @param dest    pointer to the output plane
+  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+  *                to write into dest[]
+  * @param uvalpha chroma scaling coefficient for the second line of chroma
+  *                pixels, either 2048 or 0. If 0, one chroma input is used
+  *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
+  *                is set, it generates 1 output pixel). If 2048, two chroma
+  *                input pixels should be averaged for 2 output pixels (this
+  *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
+  * @param y       vertical line number for this output. This does not need
+  *                to be used to calculate the offset in the destination,
+  *                but can be used to generate comfort noise using dithering
+  *                for some output formats.
+  */
+ typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
+                                 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                 const int16_t *alpSrc,  uint8_t *dest,
+                                 int dstW, int uvalpha, int y);
+ /**
+  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+  * output by doing bilinear scaling between two input lines.
+  *
+  * @param c       SWS scaling context
+  * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+  * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+  * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+  * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+  * @param dest    pointer to the output plane
+  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+  *                to write into dest[]
+  * @param yalpha  luma/alpha scaling coefficients for the second input line.
+  *                The first line's coefficients can be calculated by using
+  *                4096 - yalpha
+  * @param uvalpha chroma scaling coefficient for the second input line. The
+  *                first line's coefficients can be calculated by using
+  *                4096 - uvalpha
+  * @param y       vertical line number for this output. This does not need
+  *                to be used to calculate the offset in the destination,
+  *                but can be used to generate comfort noise using dithering
+  *                for some output formats.
+  */
+ typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
+                                 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                 const int16_t *alpSrc[2], uint8_t *dest,
                                  int dstW, int yalpha, int uvalpha, int y);
- typedef void (*yuv2packedX_fn) (struct SwsContext *c,
-                                 const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+ /**
+  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+  * output by doing multi-point vertical scaling between input pixels.
+  *
+  * @param c             SWS scaling context
+  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+  * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+  * @param lumFilterSize number of vertical luma/alpha input lines to scale
+  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+  * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+  * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+  * @param chrFilterSize number of vertical chroma input lines to scale
+  * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+  * @param dest          pointer to the output plane
+  * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
+  *                      to write into dest[]
+  * @param y             vertical line number for this output. This does not need
+  *                      to be used to calculate the offset in the destination,
+  *                      but can be used to generate comfort noise using dithering
+  *                      or some output formats.
+  */
+ typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
                                  const int16_t *chrFilter, const int16_t **chrUSrc,
                                  const int16_t **chrVSrc, int chrFilterSize,
                                  const int16_t **alpSrc, uint8_t *dest,
Simple merge
@@@ -75,19 -75,16 +75,21 @@@ static void RENAME(yuv2yuvX)(SwsContex
                               const int16_t *chrFilter, const int16_t **chrUSrc,
                               const int16_t **chrVSrc,
                               int chrFilterSize, const int16_t **alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW,
 -                             uint8_t *dest[4], int dstW, int chrDstW)
++                             uint8_t *dest[4], int dstW, int chrDstW,
 +                             const uint8_t *lumDither, const uint8_t *chrDither)
  {
 +    int i;
+     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
      if (uDest) {
          x86_reg uv_off = c->uv_off;
 +        for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
          YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
 +        for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4;
          YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
      }
 +    for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4;
      if (CONFIG_SWSCALE_ALPHA && aDest) {
          YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
      }
@@@ -160,19 -158,16 +162,21 @@@ static void RENAME(yuv2yuvX_ar)(SwsCont
                                  const int16_t *chrFilter, const int16_t **chrUSrc,
                                  const int16_t **chrVSrc,
                                  int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                 uint8_t *aDest, int dstW, int chrDstW,
 -                                uint8_t *dest[4], int dstW, int chrDstW)
++                                uint8_t *dest[4], int dstW, int chrDstW,
 +                                const uint8_t *lumDither, const uint8_t *chrDither)
  {
 +    int i;
+     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
      if (uDest) {
          x86_reg uv_off = c->uv_off;
 +        for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
          YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
 +        for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12;
          YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
      }
 +    for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12;
      if (CONFIG_SWSCALE_ALPHA && aDest) {
          YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
      }
  static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                               const int16_t *chrUSrc, const int16_t *chrVSrc,
                               const int16_t *alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW,
 -                             uint8_t *dst[4], int dstW, int chrDstW)
++                             uint8_t *dst[4], int dstW, int chrDstW,
 +                             const uint8_t *lumDither, const uint8_t *chrDither)
  {
      int p= 4;
-     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+     const int16_t *src[4]= {
+         lumSrc + dstW,     chrUSrc + chrDstW,
+         chrVSrc + chrDstW, alpSrc + dstW
+     };
+     x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
  
      while (p--) {
          if (dst[p]) {
  static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                  const int16_t *chrUSrc, const int16_t *chrVSrc,
                                  const int16_t *alpSrc,
-                                 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                 uint8_t *aDest, int dstW, int chrDstW,
 -                                uint8_t *dst[4], int dstW, int chrDstW)
++                                uint8_t *dst[4], int dstW, int chrDstW,
 +                                const uint8_t *lumDither, const uint8_t *chrDither)
  {
      int p= 4;
-     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-     uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-     x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+     const int16_t *src[4]= {
+         lumSrc + dstW,     chrUSrc + chrDstW,
+         chrVSrc + chrDstW, alpSrc + dstW
+     };
+     x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
  
      while (p--) {
          if (dst[p]) {
diff --cc subdir.mak
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -1,28 -1,32 +1,32 @@@
 -abgr                d894cb97f6c80eb21bdbe8a4eea62d86
 -argb                54346f2b2eef10919e0f247241df3b24
 -bgr24               570f8d6b51a838aed022ef67535f6bdc
 -bgr48be             fcc0f2dbf45d325f84f816c74cbeeebe
 -bgr48le             3f9c2b23eed3b8d196d1c14b38ce50f5
 +abgr                cff82561a074874027ac1cc896fd2730
 +argb                756dd1eaa5baca2238ce23dbdc452684
 +bgr24               e44192347a45586c6c157e3059610cd1
 +bgr48be             6d01b6ccd2ccf18c12985bcb2fde2218
 +bgr48le             4caa6914091ad03b8f67c02d6b050bc0
  bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
 -bgr555be            de8901c1358834fddea060fcb3a67beb
 -bgr555le            36b745067197f9ca8c1731cac51329c9
 -bgr565be            922a2503767036ae9536f4f7823c04ee
 -bgr565le            3a514a298c6161a071ddf9963c06509d
++bgr555be            6a2d335856db12e3ea72173d71610e21
 +bgr555le            41e3e0961478dc634bf68a7bbd670cc9
++bgr565be            21077a3744c889b97032414b11232933
 +bgr565le            614897eaeb422bd9a972f8ee51909be5
  bgr8                7f007fa6c153a16e808a9c51605a4016
 -bgra                a5e7040f9a80cccd65e5acf2ca09ace5
 +bgra                01cfdda1f72fcabb6c46424e27f8c519
  gray                d7786a7d9d99ac74230cc045cab5632c
 -gray16be            af39ce3a497f6734b157c8b94544f537
 -gray16le            7ac1b788bcc472010df7a97e762485e0
 -monob               88c4c050758e64d120f50c7eff694381
 -monow               d31772ebaa877fc2a78565937f7f9673
 +gray16be            5ba22d4802b40ec27e62abb22ad1d1cc
 +gray16le            2d5e83aa875a4c3baa6fecf55e3223bf
 +monob               cb62f31b701c6e987b574974d1b31e32
 +monow               fd5d417ab7728acddffc06870661df61
  nv12                4676d59db43d657dc12841f6bc3ab452
  nv21                69c699510ff1fb777b118ebee1002f14
 -rgb24               514692e28e8ff6860e415ce4fcf6eb8c
 -rgb48be             1894cd30dabcd3180518e4d5f09f25e7
 -rgb48le             1354e6e27ce3c1d4d4989ee56030c94b
 +rgb24               13ff53ebeab74dc05492836f1cfbd2c1
 +rgb48be             f82e99f13d5ede2a53cf3bf7178ca350
 +rgb48le             3a09d89e4b27ea1a98f762e662e306a7
  rgb4_byte           d81ffd3add95842a618eec81024f0b5c
 -rgb555be            4607309f9f217d51cbb53d13b84b4537
 -rgb555le            a350ef1dc2c9688ed49e7ba018843795
 -rgb565be            678ce231c4ea13629c1353b1df4ffbef
 -rgb565le            6f4bb711238baa762d73305213f8d035
++rgb555be            491dc49ff83258ffe415289bdcfb50b2
 +rgb555le            bd698d86c03170c4a16607c0fd1f750f
++rgb565be            35682c17c85f307147041f23ac8092aa
 +rgb565le            bfa0c639d80c3c03fd0c9e5f34296a5e
  rgb8                091d0170b354ef0e97312b95feb5483f
 -rgba                a3d362f222098a00e63867f612018659
 +rgba                16873e3ac914e76116629a5ff8940ac4
  uyvy422             314bd486277111a95d9369b944fa0400
  yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
  yuv411p             1143e7c5cc28fe0922b051b17733bc4c
Simple merge