};
#endif
+DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
+{
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+},{
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+},{
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+},{
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+},{
+ { 9, 17, 15, 23, 8, 16, 14, 22,},
+ { 25, 1, 31, 7, 24, 0, 30, 6,},
+ { 13, 21, 11, 19, 12, 20, 10, 18,},
+ { 29, 5, 27, 3, 28, 4, 26, 2,},
+ { 8, 16, 14, 22, 9, 17, 15, 23,},
+ { 24, 0, 30, 6, 25, 1, 31, 7,},
+ { 12, 20, 10, 18, 13, 21, 11, 19,},
+ { 28, 4, 26, 2, 29, 5, 27, 3,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 36, 68, 60, 92, 34, 66, 58, 90,},
+ { 100, 4,124, 28, 98, 2,122, 26,},
+ { 52, 84, 44, 76, 50, 82, 42, 74,},
+ { 116, 20,108, 12,114, 18,106, 10,},
+ { 32, 64, 56, 88, 38, 70, 62, 94,},
+ { 96, 0,120, 24,102, 6,126, 30,},
+ { 48, 80, 40, 72, 54, 86, 46, 78,},
+ { 112, 16,104, 8,118, 22,110, 14,},
+}};
+
+static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
+
+const uint16_t dither_scale[15][16]={
+{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
+{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
+{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
+{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
+{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
+{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
+{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
+{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
+{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
+{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
+{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
+{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
+{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
+{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
+{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
+};
+
static av_always_inline void
- yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
+ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
int lumFilterSize, const int16_t *chrFilter,
- const int16_t **chrUSrc, const int16_t **chrVSrc,
- int chrFilterSize, const int16_t **alpSrc,
+ const int32_t **chrUSrc, const int32_t **chrVSrc,
+ int chrFilterSize, const int32_t **alpSrc,
uint16_t *dest[4], int dstW, int chrDstW,
int big_endian, int output_bits)
{
#define r_b ((target == PIX_FMT_RGB24) ? r : b)
#define b_r ((target == PIX_FMT_RGB24) ? b : r)
++
dest[i * 6 + 0] = r_b[Y1];
dest[i * 6 + 1] = g[Y1];
dest[i * 6 + 2] = b_r[Y1];
}
}
- unsigned int val = 0;
+ static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
+ const int16_t *filter,
+ const int16_t *filterPos, int filterSize)
+ {
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ const uint16_t *src = (const uint16_t *) _src;
+
+ for (i = 0; i < dstW; i++) {
+ int j;
+ int srcPos = filterPos[i];
++ int val = 0;
+
+ for (j = 0; j < filterSize; j++) {
+ val += src[srcPos + j] * filter[filterSize * i + j];
+ }
+ // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
+ dst[i] = FFMIN(val >> 11, (1 << 19) - 1);
+ }
+ }
+
// bilinear / bicubic scaling
static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
const int16_t *filter, const int16_t *filterPos,
}
}
- static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
++static inline void hScale16N_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+ const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+ int i, j;
+
+ for (i=0; i<dstW; i++) {
+ int srcPos= filterPos[i];
+ int val=0;
+ for (j=0; j<filterSize; j++) {
+ val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+ }
+ dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+ }
+}
+
- static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
++static inline void hScale16NX_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
+ const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
+{
+ int i, j;
+ for (i=0; i<dstW; i++) {
+ int srcPos= filterPos[i];
+ int val=0;
+ for (j=0; j<filterSize; j++) {
+ val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
+ }
+ dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
+ }
+}
+
//FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
dst[i] = (dst[i]*14071 + 33561947)>>14;
}
- dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
+ static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+ {
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
+ dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
+ }
+ }
+ static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
+ {
+ int i;
+ int32_t *dstU = (int32_t *) _dstU;
+ int32_t *dstV = (int32_t *) _dstV;
+ for (i = 0; i < width; i++) {
+ dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
+ dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
+ }
+ }
+ static void lumRangeToJpeg16_c(int16_t *_dst, int width)
+ {
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
+ dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
+ }
+ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
+ {
+ int i;
+ int32_t *dst = (int32_t *) _dst;
+ for (i = 0; i < width; i++)
++ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
+ }
+
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc)
{
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
xpos+=xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
}
+ static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
+ {
+ int i;
+ uint8_t *dst = (uint8_t *) _dst;
+ for (i = len - 1; i >= 0; i--) {
+ dst[i * 2] = dst[i * 2 + 1] = src[i];
+ }
+ }
+
+ static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
+ {
+ int i;
+ for (i = 0; i < len; i++) {
+ dst[i] = src[i] >> 4;
+ }
+ }
+
// *** horizontal scale Y line to temp buffer
- static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
+ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc,
const int16_t *hLumFilter,
const int16_t *hLumFilterPos, int hLumFilterSize,
src= formatConvBuffer;
}
- if (!c->hyscale_fast) {
+ if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
+ c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
+ src = formatConvBuffer;
+ }
+
+ if (c->hScale16) {
+ int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
+ } else if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
xpos+=xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
}
- static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
+ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc, const int16_t *hChrFilter,
const int16_t *hChrFilterPos, int hChrFilterSize,
src2= buf2;
}
- uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
+ if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
- if (!c->hcscale_fast) {
++ uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2+78, 16));
+ c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
+ c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
+ src1 = formatConvBuffer;
+ src2 = buf2;
+ }
+
+ if (c->hScale16) {
+ int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+ c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
+ } else if (!c->hcscale_fast) {
c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
} else { // fast bilinear upscale / crap downscale
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
- case PIX_FMT_GRAY16BE :
- case PIX_FMT_YUV444P9LE:
- case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
- case PIX_FMT_YUV422P10LE:
- case PIX_FMT_YUV444P10LE:
- case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
case PIX_FMT_YUV444P9BE:
- case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
+ case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
- case PIX_FMT_YUV420P10BE:
- case PIX_FMT_YUV420P16BE:
- case PIX_FMT_YUV422P16BE:
- case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
- case PIX_FMT_GRAY16LE :
- case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
++ case PIX_FMT_YUV420P10BE: c->hScale16= HAVE_BIGENDIAN ? hScale16N_c : hScale16NX_c; break;
+ case PIX_FMT_YUV444P9LE:
+ case PIX_FMT_YUV420P9LE:
+ case PIX_FMT_YUV422P10LE:
+ case PIX_FMT_YUV420P10LE:
- case PIX_FMT_YUV444P10LE:
++ case PIX_FMT_YUV444P10LE: c->hScale16= HAVE_BIGENDIAN ? hScale16NX_c : hScale16N_c; break;
+ #if HAVE_BIGENDIAN
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
- case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
+ case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
+ #else
+ case PIX_FMT_YUV420P16BE:
+ case PIX_FMT_YUV422P16BE:
+ case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
+ #endif
}
if (c->chrSrcHSubSample) {
switch(srcFormat) {
c->lumToYV12 = NULL;
c->alpToYV12 = NULL;
switch (srcFormat) {
- case PIX_FMT_YUV444P9LE:
- case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
- case PIX_FMT_YUV444P10LE:
- case PIX_FMT_YUV422P10LE:
- case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
- case PIX_FMT_YUV444P9BE:
- case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
- case PIX_FMT_YUV444P10BE:
- case PIX_FMT_YUV422P10BE:
- case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
+ #if HAVE_BIGENDIAN
+ case PIX_FMT_YUV420P16LE:
+ case PIX_FMT_YUV422P16LE:
+ case PIX_FMT_YUV444P16LE:
+ case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
+ #else
+ case PIX_FMT_YUV420P16BE:
+ case PIX_FMT_YUV422P16BE:
+ case PIX_FMT_YUV444P16BE:
+ case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
+ #endif
case PIX_FMT_YUYV422 :
- case PIX_FMT_GRAY8A :
- c->lumToYV12 = yuy2ToY_c; break;
- case PIX_FMT_UYVY422 :
- c->lumToYV12 = uyvyToY_c; break;
+ case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
+ case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
}
}
- if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
- c->hScale16= hScale16_c;
++ if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
++ || c->srcFormat == PIX_FMT_PAL8)
++ c->hScale16= hScale16N_c;
++
+ if (c->scalingBpp == 8) {
+ c->hScale = hScale_c;
+ if (c->flags & SWS_FAST_BILINEAR) {
+ c->hyscale_fast = hyscale_fast_c;
+ c->hcscale_fast = hcscale_fast_c;
+ }
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
if (c->srcRange) {