2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
83 static const double rgb2yuv_table[8][9]={
84 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
85 {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
86 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
87 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
88 {0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
89 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
90 {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
91 {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
96 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
99 more intelligent misalignment avoidance for the horizontal scaler
100 write special vertical cubic upscale version
101 optimize C code (YV12 / minmax)
102 add support for packed pixel YUV input & output
103 add support for Y8 output
104 optimize BGR24 & BGR32
105 add BGR4 output support
106 write special BGR->BGR scaler
109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
110 { 1, 3, 1, 3, 1, 3, 1, 3, },
111 { 2, 0, 2, 0, 2, 0, 2, 0, },
114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
115 { 6, 2, 6, 2, 6, 2, 6, 2, },
116 { 0, 4, 0, 4, 0, 4, 0, 4, },
119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
120 { 8, 4, 11, 7, 8, 4, 11, 7, },
121 { 2, 14, 1, 13, 2, 14, 1, 13, },
122 { 10, 6, 9, 5, 10, 6, 9, 5, },
123 { 0, 12, 3, 15, 0, 12, 3, 15, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
127 { 17, 9, 23, 15, 16, 8, 22, 14, },
128 { 5, 29, 3, 27, 4, 28, 2, 26, },
129 { 21, 13, 19, 11, 20, 12, 18, 10, },
130 { 0, 24, 6, 30, 1, 25, 7, 31, },
131 { 16, 8, 22, 14, 17, 9, 23, 15, },
132 { 4, 28, 2, 26, 5, 29, 3, 27, },
133 { 20, 12, 18, 10, 21, 13, 19, 11, },
134 { 1, 25, 7, 31, 0, 24, 6, 30, },
137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
138 { 0, 55, 14, 68, 3, 58, 17, 72, },
139 { 37, 18, 50, 32, 40, 22, 54, 35, },
140 { 9, 64, 5, 59, 13, 67, 8, 63, },
141 { 46, 27, 41, 23, 49, 31, 44, 26, },
142 { 2, 57, 16, 71, 1, 56, 15, 70, },
143 { 39, 21, 52, 34, 38, 19, 51, 33, },
144 { 11, 66, 7, 62, 10, 65, 6, 60, },
145 { 48, 30, 43, 25, 47, 29, 42, 24, },
149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
150 {117, 62, 158, 103, 113, 58, 155, 100, },
151 { 34, 199, 21, 186, 31, 196, 17, 182, },
152 {144, 89, 131, 76, 141, 86, 127, 72, },
153 { 0, 165, 41, 206, 10, 175, 52, 217, },
154 {110, 55, 151, 96, 120, 65, 162, 107, },
155 { 28, 193, 14, 179, 38, 203, 24, 189, },
156 {138, 83, 124, 69, 148, 93, 134, 79, },
157 { 7, 172, 48, 213, 3, 168, 45, 210, },
160 // tries to correct a gamma of 1.5
161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
162 { 0, 143, 18, 200, 2, 156, 25, 215, },
163 { 78, 28, 125, 64, 89, 36, 138, 74, },
164 { 10, 180, 3, 161, 16, 195, 8, 175, },
165 {109, 51, 93, 38, 121, 60, 105, 47, },
166 { 1, 152, 23, 210, 0, 147, 20, 205, },
167 { 85, 33, 134, 71, 81, 30, 130, 67, },
168 { 14, 190, 6, 171, 12, 185, 5, 166, },
169 {117, 57, 101, 44, 113, 54, 97, 41, },
172 // tries to correct a gamma of 2.0
173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
174 { 0, 124, 8, 193, 0, 140, 12, 213, },
175 { 55, 14, 104, 42, 66, 19, 119, 52, },
176 { 3, 168, 1, 145, 6, 187, 3, 162, },
177 { 86, 31, 70, 21, 99, 39, 82, 28, },
178 { 0, 134, 11, 206, 0, 129, 9, 200, },
179 { 62, 17, 114, 48, 58, 16, 109, 45, },
180 { 5, 181, 2, 157, 4, 175, 1, 151, },
181 { 95, 36, 78, 26, 90, 34, 74, 24, },
184 // tries to correct a gamma of 2.5
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
186 { 0, 107, 3, 187, 0, 125, 6, 212, },
187 { 39, 7, 86, 28, 49, 11, 102, 36, },
188 { 1, 158, 0, 131, 3, 180, 1, 151, },
189 { 68, 19, 52, 12, 81, 25, 64, 17, },
190 { 0, 119, 5, 203, 0, 113, 4, 195, },
191 { 45, 9, 96, 33, 42, 8, 91, 30, },
192 { 2, 172, 1, 144, 2, 165, 0, 137, },
193 { 77, 23, 60, 15, 72, 21, 56, 14, },
197 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
199 { 0, 1, 0, 1, 0, 1, 0, 1,},
200 { 1, 0, 1, 0, 1, 0, 1, 0,},
201 { 0, 1, 0, 1, 0, 1, 0, 1,},
202 { 1, 0, 1, 0, 1, 0, 1, 0,},
203 { 0, 1, 0, 1, 0, 1, 0, 1,},
204 { 1, 0, 1, 0, 1, 0, 1, 0,},
205 { 0, 1, 0, 1, 0, 1, 0, 1,},
206 { 1, 0, 1, 0, 1, 0, 1, 0,},
208 { 1, 2, 1, 2, 1, 2, 1, 2,},
209 { 3, 0, 3, 0, 3, 0, 3, 0,},
210 { 1, 2, 1, 2, 1, 2, 1, 2,},
211 { 3, 0, 3, 0, 3, 0, 3, 0,},
212 { 1, 2, 1, 2, 1, 2, 1, 2,},
213 { 3, 0, 3, 0, 3, 0, 3, 0,},
214 { 1, 2, 1, 2, 1, 2, 1, 2,},
215 { 3, 0, 3, 0, 3, 0, 3, 0,},
217 { 2, 4, 3, 5, 2, 4, 3, 5,},
218 { 6, 0, 7, 1, 6, 0, 7, 1,},
219 { 3, 5, 2, 4, 3, 5, 2, 4,},
220 { 7, 1, 6, 0, 7, 1, 6, 0,},
221 { 2, 4, 3, 5, 2, 4, 3, 5,},
222 { 6, 0, 7, 1, 6, 0, 7, 1,},
223 { 3, 5, 2, 4, 3, 5, 2, 4,},
224 { 7, 1, 6, 0, 7, 1, 6, 0,},
226 { 4, 8, 7, 11, 4, 8, 7, 11,},
227 { 12, 0, 15, 3, 12, 0, 15, 3,},
228 { 6, 10, 5, 9, 6, 10, 5, 9,},
229 { 14, 2, 13, 1, 14, 2, 13, 1,},
230 { 4, 8, 7, 11, 4, 8, 7, 11,},
231 { 12, 0, 15, 3, 12, 0, 15, 3,},
232 { 6, 10, 5, 9, 6, 10, 5, 9,},
233 { 14, 2, 13, 1, 14, 2, 13, 1,},
235 { 9, 17, 15, 23, 8, 16, 14, 22,},
236 { 25, 1, 31, 7, 24, 0, 30, 6,},
237 { 13, 21, 11, 19, 12, 20, 10, 18,},
238 { 29, 5, 27, 3, 28, 4, 26, 2,},
239 { 8, 16, 14, 22, 9, 17, 15, 23,},
240 { 24, 0, 30, 6, 25, 1, 31, 7,},
241 { 12, 20, 10, 18, 13, 21, 11, 19,},
242 { 28, 4, 26, 2, 29, 5, 27, 3,},
244 { 18, 34, 30, 46, 17, 33, 29, 45,},
245 { 50, 2, 62, 14, 49, 1, 61, 13,},
246 { 26, 42, 22, 38, 25, 41, 21, 37,},
247 { 58, 10, 54, 6, 57, 9, 53, 5,},
248 { 16, 32, 28, 44, 19, 35, 31, 47,},
249 { 48, 0, 60, 12, 51, 3, 63, 15,},
250 { 24, 40, 20, 36, 27, 43, 23, 39,},
251 { 56, 8, 52, 4, 59, 11, 55, 7,},
253 { 18, 34, 30, 46, 17, 33, 29, 45,},
254 { 50, 2, 62, 14, 49, 1, 61, 13,},
255 { 26, 42, 22, 38, 25, 41, 21, 37,},
256 { 58, 10, 54, 6, 57, 9, 53, 5,},
257 { 16, 32, 28, 44, 19, 35, 31, 47,},
258 { 48, 0, 60, 12, 51, 3, 63, 15,},
259 { 24, 40, 20, 36, 27, 43, 23, 39,},
260 { 56, 8, 52, 4, 59, 11, 55, 7,},
262 { 36, 68, 60, 92, 34, 66, 58, 90,},
263 { 100, 4,124, 28, 98, 2,122, 26,},
264 { 52, 84, 44, 76, 50, 82, 42, 74,},
265 { 116, 20,108, 12,114, 18,106, 10,},
266 { 32, 64, 56, 88, 38, 70, 62, 94,},
267 { 96, 0,120, 24,102, 6,126, 30,},
268 { 48, 80, 40, 72, 54, 86, 46, 78,},
269 { 112, 16,104, 8,118, 22,110, 14,},
272 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
274 const uint16_t dither_scale[15][16]={
275 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
276 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
277 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
278 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
279 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
280 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
281 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
282 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
283 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
284 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
285 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
286 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
287 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
288 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
289 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
292 static av_always_inline void
293 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
294 int lumFilterSize, const int16_t *chrFilter,
295 const int16_t **chrUSrc, const int16_t **chrVSrc,
296 int chrFilterSize, const int16_t **alpSrc,
297 uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
298 uint16_t *aDest, int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int shift = 11 + 16 - output_bits;
305 #define output_pixel(pos, val) \
307 if (output_bits == 16) { \
308 AV_WB16(pos, av_clip_uint16(val >> shift)); \
310 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
313 if (output_bits == 16) { \
314 AV_WL16(pos, av_clip_uint16(val >> shift)); \
316 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
319 for (i = 0; i < dstW; i++) {
320 int val = 1 << (26-output_bits);
323 for (j = 0; j < lumFilterSize; j++)
324 val += lumSrc[j][i] * lumFilter[j];
326 output_pixel(&dest[i], val);
330 for (i = 0; i < chrDstW; i++) {
331 int u = 1 << (26-output_bits);
332 int v = 1 << (26-output_bits);
335 for (j = 0; j < chrFilterSize; j++) {
336 u += chrUSrc[j][i] * chrFilter[j];
337 v += chrVSrc[j][i] * chrFilter[j];
340 output_pixel(&uDest[i], u);
341 output_pixel(&vDest[i], v);
345 if (CONFIG_SWSCALE_ALPHA && aDest) {
346 for (i = 0; i < dstW; i++) {
347 int val = 1 << (26-output_bits);
350 for (j = 0; j < lumFilterSize; j++)
351 val += alpSrc[j][i] * lumFilter[j];
353 output_pixel(&aDest[i], val);
359 #define yuv2NBPS(bits, BE_LE, is_be) \
360 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
361 const int16_t **lumSrc, int lumFilterSize, \
362 const int16_t *chrFilter, const int16_t **chrUSrc, \
363 const int16_t **chrVSrc, \
364 int chrFilterSize, const int16_t **alpSrc, \
365 uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
366 uint8_t *_aDest, int dstW, int chrDstW) \
368 uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \
369 *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
370 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
371 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
373 dest, uDest, vDest, aDest, \
374 dstW, chrDstW, is_be, bits); \
383 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
384 const int16_t **lumSrc, int lumFilterSize,
385 const int16_t *chrFilter, const int16_t **chrUSrc,
386 const int16_t **chrVSrc,
387 int chrFilterSize, const int16_t **alpSrc,
388 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
389 uint8_t *aDest, int dstW, int chrDstW,
390 const uint8_t *lumDither, const uint8_t *chrDither)
392 //FIXME Optimize (just quickly written not optimized..)
394 for (i=0; i<dstW; i++) {
395 int val = lumDither[i&7] << 12;
397 for (j=0; j<lumFilterSize; j++)
398 val += lumSrc[j][i] * lumFilter[j];
400 dest[i]= av_clip_uint8(val>>19);
404 for (i=0; i<chrDstW; i++) {
405 int u = chrDither[i&7] << 12;
406 int v = chrDither[(i+3)&7] << 12;
408 for (j=0; j<chrFilterSize; j++) {
409 u += chrUSrc[j][i] * chrFilter[j];
410 v += chrVSrc[j][i] * chrFilter[j];
413 uDest[i]= av_clip_uint8(u>>19);
414 vDest[i]= av_clip_uint8(v>>19);
417 if (CONFIG_SWSCALE_ALPHA && aDest)
418 for (i=0; i<dstW; i++) {
419 int val = lumDither[i&7] << 12;
421 for (j=0; j<lumFilterSize; j++)
422 val += alpSrc[j][i] * lumFilter[j];
424 aDest[i]= av_clip_uint8(val>>19);
428 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
429 const int16_t *chrUSrc, const int16_t *chrVSrc,
430 const int16_t *alpSrc,
431 uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
432 uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
436 for (i=0; i<dstW; i++) {
437 int val= (lumSrc[i]+lumDither[i&7])>>7;
438 dest[i]= av_clip_uint8(val);
442 for (i=0; i<chrDstW; i++) {
443 int u=(chrUSrc[i]+chrDither[i&7])>>7;
444 int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
445 uDest[i]= av_clip_uint8(u);
446 vDest[i]= av_clip_uint8(v);
449 if (CONFIG_SWSCALE_ALPHA && aDest)
450 for (i=0; i<dstW; i++) {
451 int val= (alpSrc[i]+lumDither[i&7])>>7;
452 aDest[i]= av_clip_uint8(val);
456 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
457 const int16_t **lumSrc, int lumFilterSize,
458 const int16_t *chrFilter, const int16_t **chrUSrc,
459 const int16_t **chrVSrc, int chrFilterSize,
460 const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
461 uint8_t *vDest, uint8_t *aDest,
462 int dstW, int chrDstW,
463 const uint8_t *lumDither, const uint8_t *chrDither)
465 enum PixelFormat dstFormat = c->dstFormat;
467 //FIXME Optimize (just quickly written not optimized..)
469 for (i=0; i<dstW; i++) {
470 int val = lumDither[i&7]<<12;
472 for (j=0; j<lumFilterSize; j++)
473 val += lumSrc[j][i] * lumFilter[j];
475 dest[i]= av_clip_uint8(val>>19);
481 if (dstFormat == PIX_FMT_NV12)
482 for (i=0; i<chrDstW; i++) {
483 int u = chrDither[i&7]<<12;
484 int v = chrDither[(i+3)&7]<<12;
486 for (j=0; j<chrFilterSize; j++) {
487 u += chrUSrc[j][i] * chrFilter[j];
488 v += chrVSrc[j][i] * chrFilter[j];
491 uDest[2*i]= av_clip_uint8(u>>19);
492 uDest[2*i+1]= av_clip_uint8(v>>19);
495 for (i=0; i<chrDstW; i++) {
496 int u = chrDither[i&7]<<12;
497 int v = chrDither[(i+3)&7]<<12;
499 for (j=0; j<chrFilterSize; j++) {
500 u += chrUSrc[j][i] * chrFilter[j];
501 v += chrVSrc[j][i] * chrFilter[j];
504 uDest[2*i]= av_clip_uint8(v>>19);
505 uDest[2*i+1]= av_clip_uint8(u>>19);
509 #define output_pixel(pos, val) \
510 if (target == PIX_FMT_GRAY16BE) { \
516 static av_always_inline void
517 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
518 const int16_t **lumSrc, int lumFilterSize,
519 const int16_t *chrFilter, const int16_t **chrUSrc,
520 const int16_t **chrVSrc, int chrFilterSize,
521 const int16_t **alpSrc, uint8_t *dest, int dstW,
522 int y, enum PixelFormat target)
526 for (i = 0; i < (dstW >> 1); i++) {
530 const int i2 = 2 * i;
532 for (j = 0; j < lumFilterSize; j++) {
533 Y1 += lumSrc[j][i2] * lumFilter[j];
534 Y2 += lumSrc[j][i2+1] * lumFilter[j];
538 if ((Y1 | Y2) & 0x10000) {
539 Y1 = av_clip_uint16(Y1);
540 Y2 = av_clip_uint16(Y2);
542 output_pixel(&dest[2 * i2 + 0], Y1);
543 output_pixel(&dest[2 * i2 + 2], Y2);
547 static av_always_inline void
548 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
549 const uint16_t *buf1, const uint16_t *ubuf0,
550 const uint16_t *ubuf1, const uint16_t *vbuf0,
551 const uint16_t *vbuf1, const uint16_t *abuf0,
552 const uint16_t *abuf1, uint8_t *dest, int dstW,
553 int yalpha, int uvalpha, int y,
554 enum PixelFormat target)
556 int yalpha1 = 4095 - yalpha; \
559 for (i = 0; i < (dstW >> 1); i++) {
560 const int i2 = 2 * i;
561 int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11;
562 int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
564 output_pixel(&dest[2 * i2 + 0], Y1);
565 output_pixel(&dest[2 * i2 + 2], Y2);
569 static av_always_inline void
570 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
571 const uint16_t *ubuf0, const uint16_t *ubuf1,
572 const uint16_t *vbuf0, const uint16_t *vbuf1,
573 const uint16_t *abuf0, uint8_t *dest, int dstW,
574 int uvalpha, enum PixelFormat dstFormat,
575 int flags, int y, enum PixelFormat target)
579 for (i = 0; i < (dstW >> 1); i++) {
580 const int i2 = 2 * i;
581 int Y1 = buf0[i2 ] << 1;
582 int Y2 = buf0[i2+1] << 1;
584 output_pixel(&dest[2 * i2 + 0], Y1);
585 output_pixel(&dest[2 * i2 + 2], Y2);
591 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
592 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
593 const int16_t **lumSrc, int lumFilterSize, \
594 const int16_t *chrFilter, const int16_t **chrUSrc, \
595 const int16_t **chrVSrc, int chrFilterSize, \
596 const int16_t **alpSrc, uint8_t *dest, int dstW, \
599 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
600 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
601 alpSrc, dest, dstW, y, fmt); \
604 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
605 const uint16_t *buf1, const uint16_t *ubuf0, \
606 const uint16_t *ubuf1, const uint16_t *vbuf0, \
607 const uint16_t *vbuf1, const uint16_t *abuf0, \
608 const uint16_t *abuf1, uint8_t *dest, int dstW, \
609 int yalpha, int uvalpha, int y) \
611 name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
612 vbuf0, vbuf1, abuf0, abuf1, \
613 dest, dstW, yalpha, uvalpha, y, fmt); \
616 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
617 const uint16_t *ubuf0, const uint16_t *ubuf1, \
618 const uint16_t *vbuf0, const uint16_t *vbuf1, \
619 const uint16_t *abuf0, uint8_t *dest, int dstW, \
620 int uvalpha, enum PixelFormat dstFormat, \
623 name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
624 vbuf1, abuf0, dest, dstW, uvalpha, \
625 dstFormat, flags, y, fmt); \
628 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
629 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
631 #define output_pixel(pos, acc) \
632 if (target == PIX_FMT_MONOBLACK) { \
638 static av_always_inline void
639 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
640 const int16_t **lumSrc, int lumFilterSize,
641 const int16_t *chrFilter, const int16_t **chrUSrc,
642 const int16_t **chrVSrc, int chrFilterSize,
643 const int16_t **alpSrc, uint8_t *dest, int dstW,
644 int y, enum PixelFormat target)
646 const uint8_t * const d128=dither_8x8_220[y&7];
647 uint8_t *g = c->table_gU[128] + c->table_gV[128];
651 for (i = 0; i < dstW - 1; i += 2) {
656 for (j = 0; j < lumFilterSize; j++) {
657 Y1 += lumSrc[j][i] * lumFilter[j];
658 Y2 += lumSrc[j][i+1] * lumFilter[j];
662 if ((Y1 | Y2) & 0x100) {
663 Y1 = av_clip_uint8(Y1);
664 Y2 = av_clip_uint8(Y2);
666 acc += acc + g[Y1 + d128[(i + 0) & 7]];
667 acc += acc + g[Y2 + d128[(i + 1) & 7]];
669 output_pixel(*dest++, acc);
674 static av_always_inline void
675 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
676 const uint16_t *buf1, const uint16_t *ubuf0,
677 const uint16_t *ubuf1, const uint16_t *vbuf0,
678 const uint16_t *vbuf1, const uint16_t *abuf0,
679 const uint16_t *abuf1, uint8_t *dest, int dstW,
680 int yalpha, int uvalpha, int y,
681 enum PixelFormat target)
683 const uint8_t * const d128 = dither_8x8_220[y & 7];
684 uint8_t *g = c->table_gU[128] + c->table_gV[128];
685 int yalpha1 = 4095 - yalpha;
688 for (i = 0; i < dstW - 7; i += 8) {
689 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
690 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
691 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
692 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
693 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
694 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
695 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
696 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
697 output_pixel(*dest++, acc);
701 static av_always_inline void
702 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
703 const uint16_t *ubuf0, const uint16_t *ubuf1,
704 const uint16_t *vbuf0, const uint16_t *vbuf1,
705 const uint16_t *abuf0, uint8_t *dest, int dstW,
706 int uvalpha, enum PixelFormat dstFormat,
707 int flags, int y, enum PixelFormat target)
709 const uint8_t * const d128 = dither_8x8_220[y & 7];
710 uint8_t *g = c->table_gU[128] + c->table_gV[128];
713 for (i = 0; i < dstW - 7; i += 8) {
714 int acc = g[(buf0[i ] >> 7) + d128[0]];
715 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
716 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
717 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
718 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
719 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
720 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
721 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
722 output_pixel(*dest++, acc);
728 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
729 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
731 #define output_pixels(pos, Y1, U, Y2, V) \
732 if (target == PIX_FMT_YUYV422) { \
733 dest[pos + 0] = Y1; \
735 dest[pos + 2] = Y2; \
739 dest[pos + 1] = Y1; \
741 dest[pos + 3] = Y2; \
744 static av_always_inline void
745 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
746 const int16_t **lumSrc, int lumFilterSize,
747 const int16_t *chrFilter, const int16_t **chrUSrc,
748 const int16_t **chrVSrc, int chrFilterSize,
749 const int16_t **alpSrc, uint8_t *dest, int dstW,
750 int y, enum PixelFormat target)
754 for (i = 0; i < (dstW >> 1); i++) {
761 for (j = 0; j < lumFilterSize; j++) {
762 Y1 += lumSrc[j][i * 2] * lumFilter[j];
763 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
765 for (j = 0; j < chrFilterSize; j++) {
766 U += chrUSrc[j][i] * chrFilter[j];
767 V += chrVSrc[j][i] * chrFilter[j];
773 if ((Y1 | Y2 | U | V) & 0x100) {
774 Y1 = av_clip_uint8(Y1);
775 Y2 = av_clip_uint8(Y2);
776 U = av_clip_uint8(U);
777 V = av_clip_uint8(V);
779 output_pixels(4*i, Y1, U, Y2, V);
783 static av_always_inline void
784 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
785 const uint16_t *buf1, const uint16_t *ubuf0,
786 const uint16_t *ubuf1, const uint16_t *vbuf0,
787 const uint16_t *vbuf1, const uint16_t *abuf0,
788 const uint16_t *abuf1, uint8_t *dest, int dstW,
789 int yalpha, int uvalpha, int y,
790 enum PixelFormat target)
792 int yalpha1 = 4095 - yalpha;
793 int uvalpha1 = 4095 - uvalpha;
796 for (i = 0; i < (dstW >> 1); i++) {
797 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
798 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
799 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
800 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
802 output_pixels(i * 4, Y1, U, Y2, V);
806 static av_always_inline void
807 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
808 const uint16_t *ubuf0, const uint16_t *ubuf1,
809 const uint16_t *vbuf0, const uint16_t *vbuf1,
810 const uint16_t *abuf0, uint8_t *dest, int dstW,
811 int uvalpha, enum PixelFormat dstFormat,
812 int flags, int y, enum PixelFormat target)
816 if (uvalpha < 2048) {
817 for (i = 0; i < (dstW >> 1); i++) {
818 int Y1 = buf0[i * 2] >> 7;
819 int Y2 = buf0[i * 2 + 1] >> 7;
820 int U = ubuf1[i] >> 7;
821 int V = vbuf1[i] >> 7;
823 output_pixels(i * 4, Y1, U, Y2, V);
826 for (i = 0; i < (dstW >> 1); i++) {
827 int Y1 = buf0[i * 2] >> 7;
828 int Y2 = buf0[i * 2 + 1] >> 7;
829 int U = (ubuf0[i] + ubuf1[i]) >> 8;
830 int V = (vbuf0[i] + vbuf1[i]) >> 8;
832 output_pixels(i * 4, Y1, U, Y2, V);
839 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
840 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
842 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
843 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
845 static av_always_inline void
846 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
847 const int16_t **lumSrc, int lumFilterSize,
848 const int16_t *chrFilter, const int16_t **chrUSrc,
849 const int16_t **chrVSrc, int chrFilterSize,
850 const int16_t **alpSrc, uint8_t *dest, int dstW,
851 int y, enum PixelFormat target)
855 for (i = 0; i < (dstW >> 1); i++) {
861 const uint8_t *r, *g, *b;
863 for (j = 0; j < lumFilterSize; j++) {
864 Y1 += lumSrc[j][i * 2] * lumFilter[j];
865 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
867 for (j = 0; j < chrFilterSize; j++) {
868 U += chrUSrc[j][i] * chrFilter[j];
869 V += chrVSrc[j][i] * chrFilter[j];
875 if ((Y1 | Y2 | U | V) & 0x100) {
876 Y1 = av_clip_uint8(Y1);
877 Y2 = av_clip_uint8(Y2);
878 U = av_clip_uint8(U);
879 V = av_clip_uint8(V);
882 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
883 r = (const uint8_t *) c->table_rV[V];
884 g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
885 b = (const uint8_t *) c->table_bU[U];
887 dest[ 0] = dest[ 1] = r_b[Y1];
888 dest[ 2] = dest[ 3] = g[Y1];
889 dest[ 4] = dest[ 5] = b_r[Y1];
890 dest[ 6] = dest[ 7] = r_b[Y2];
891 dest[ 8] = dest[ 9] = g[Y2];
892 dest[10] = dest[11] = b_r[Y2];
897 static av_always_inline void
898 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
899 const uint16_t *buf1, const uint16_t *ubuf0,
900 const uint16_t *ubuf1, const uint16_t *vbuf0,
901 const uint16_t *vbuf1, const uint16_t *abuf0,
902 const uint16_t *abuf1, uint8_t *dest, int dstW,
903 int yalpha, int uvalpha, int y,
904 enum PixelFormat target)
906 int yalpha1 = 4095 - yalpha;
907 int uvalpha1 = 4095 - uvalpha;
910 for (i = 0; i < (dstW >> 1); i++) {
911 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
912 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
913 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
914 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
915 const uint8_t *r = (const uint8_t *) c->table_rV[V],
916 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
917 *b = (const uint8_t *) c->table_bU[U];
919 dest[ 0] = dest[ 1] = r_b[Y1];
920 dest[ 2] = dest[ 3] = g[Y1];
921 dest[ 4] = dest[ 5] = b_r[Y1];
922 dest[ 6] = dest[ 7] = r_b[Y2];
923 dest[ 8] = dest[ 9] = g[Y2];
924 dest[10] = dest[11] = b_r[Y2];
929 static av_always_inline void
930 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
931 const uint16_t *ubuf0, const uint16_t *ubuf1,
932 const uint16_t *vbuf0, const uint16_t *vbuf1,
933 const uint16_t *abuf0, uint8_t *dest, int dstW,
934 int uvalpha, enum PixelFormat dstFormat,
935 int flags, int y, enum PixelFormat target)
939 if (uvalpha < 2048) {
940 for (i = 0; i < (dstW >> 1); i++) {
941 int Y1 = buf0[i * 2] >> 7;
942 int Y2 = buf0[i * 2 + 1] >> 7;
943 int U = ubuf1[i] >> 7;
944 int V = vbuf1[i] >> 7;
945 const uint8_t *r = (const uint8_t *) c->table_rV[V],
946 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
947 *b = (const uint8_t *) c->table_bU[U];
949 dest[ 0] = dest[ 1] = r_b[Y1];
950 dest[ 2] = dest[ 3] = g[Y1];
951 dest[ 4] = dest[ 5] = b_r[Y1];
952 dest[ 6] = dest[ 7] = r_b[Y2];
953 dest[ 8] = dest[ 9] = g[Y2];
954 dest[10] = dest[11] = b_r[Y2];
958 for (i = 0; i < (dstW >> 1); i++) {
959 int Y1 = buf0[i * 2] >> 7;
960 int Y2 = buf0[i * 2 + 1] >> 7;
961 int U = (ubuf0[i] + ubuf1[i]) >> 8;
962 int V = (vbuf0[i] + vbuf1[i]) >> 8;
963 const uint8_t *r = (const uint8_t *) c->table_rV[V],
964 *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
965 *b = (const uint8_t *) c->table_bU[U];
967 dest[ 0] = dest[ 1] = r_b[Y1];
968 dest[ 2] = dest[ 3] = g[Y1];
969 dest[ 4] = dest[ 5] = b_r[Y1];
970 dest[ 6] = dest[ 7] = r_b[Y2];
971 dest[ 8] = dest[ 9] = g[Y2];
972 dest[10] = dest[11] = b_r[Y2];
981 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
982 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
983 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
984 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
986 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
987 for (i=0; i<(dstW>>1); i++) {\
993 int av_unused A1, A2;\
994 type av_unused *r, *b, *g;\
997 for (j=0; j<lumFilterSize; j++) {\
998 Y1 += lumSrc[j][i2] * lumFilter[j];\
999 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
1001 for (j=0; j<chrFilterSize; j++) {\
1002 U += chrUSrc[j][i] * chrFilter[j];\
1003 V += chrVSrc[j][i] * chrFilter[j];\
1009 if ((Y1|Y2|U|V)&0x100) {\
1010 Y1 = av_clip_uint8(Y1); \
1011 Y2 = av_clip_uint8(Y2); \
1012 U = av_clip_uint8(U); \
1013 V = av_clip_uint8(V); \
1018 for (j=0; j<lumFilterSize; j++) {\
1019 A1 += alpSrc[j][i2 ] * lumFilter[j];\
1020 A2 += alpSrc[j][i2+1] * lumFilter[j];\
1024 if ((A1|A2)&0x100) {\
1025 A1 = av_clip_uint8(A1); \
1026 A2 = av_clip_uint8(A2); \
1029 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
1030 r = (type *)c->table_rV[V]; \
1031 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
1032 b = (type *)c->table_bU[U];
1034 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
1035 for (i=0; i<dstW; i++) {\
1038 int U = (1<<9)-(128<<19);\
1039 int V = (1<<9)-(128<<19);\
1043 for (j=0; j<lumFilterSize; j++) {\
1044 Y += lumSrc[j][i ] * lumFilter[j];\
1046 for (j=0; j<chrFilterSize; j++) {\
1047 U += chrUSrc[j][i] * chrFilter[j];\
1048 V += chrVSrc[j][i] * chrFilter[j];\
1055 for (j=0; j<lumFilterSize; j++)\
1056 A += alpSrc[j][i ] * lumFilter[j];\
1059 A = av_clip_uint8(A);\
1061 Y-= c->yuv2rgb_y_offset;\
1062 Y*= c->yuv2rgb_y_coeff;\
1064 R= Y + V*c->yuv2rgb_v2r_coeff;\
1065 G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
1066 B= Y + U*c->yuv2rgb_u2b_coeff;\
1067 if ((R|G|B)&(0xC0000000)) {\
1068 R = av_clip_uintp2(R, 30); \
1069 G = av_clip_uintp2(G, 30); \
1070 B = av_clip_uintp2(B, 30); \
1073 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
1074 for (i=0; i<(dstW>>1); i++) { \
1075 const int i2= 2*i; \
1076 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
1077 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
1078 int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \
1079 int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \
1080 type av_unused *r, *b, *g; \
1081 int av_unused A1, A2; \
1083 A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \
1084 A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \
1086 r = (type *)c->table_rV[V];\
1087 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1088 b = (type *)c->table_bU[U];
1090 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
1091 for (i=0; i<(dstW>>1); i++) {\
1093 int Y1= buf0[i2 ]>>7;\
1094 int Y2= buf0[i2+1]>>7;\
1095 int U= (ubuf1[i])>>7;\
1096 int V= (vbuf1[i])>>7;\
1097 type av_unused *r, *b, *g;\
1098 int av_unused A1, A2;\
1101 A2= abuf0[i2+1]>>7;\
1103 r = (type *)c->table_rV[V];\
1104 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1105 b = (type *)c->table_bU[U];
1107 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
1108 for (i=0; i<(dstW>>1); i++) {\
1110 int Y1= buf0[i2 ]>>7;\
1111 int Y2= buf0[i2+1]>>7;\
1112 int U= (ubuf0[i] + ubuf1[i])>>8;\
1113 int V= (vbuf0[i] + vbuf1[i])>>8;\
1114 type av_unused *r, *b, *g;\
1115 int av_unused A1, A2;\
1118 A2= abuf0[i2+1]>>7;\
1120 r = (type *)c->table_rV[V];\
1121 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
1122 b = (type *)c->table_bU[U];
1124 #define YSCALE_YUV_2_ANYRGB_C(func)\
1125 switch(c->dstFormat) {\
1128 if (CONFIG_SMALL) {\
1129 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1130 func(uint32_t,needAlpha)\
1131 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
1132 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
1135 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1137 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
1138 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
1142 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1143 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1150 if (CONFIG_SMALL) {\
1151 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
1152 func(uint32_t,needAlpha)\
1153 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
1154 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
1157 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
1159 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
1160 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
1164 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
1165 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
1170 case PIX_FMT_RGB24:\
1172 ((uint8_t*)dest)[0]= r[Y1];\
1173 ((uint8_t*)dest)[1]= g[Y1];\
1174 ((uint8_t*)dest)[2]= b[Y1];\
1175 ((uint8_t*)dest)[3]= r[Y2];\
1176 ((uint8_t*)dest)[4]= g[Y2];\
1177 ((uint8_t*)dest)[5]= b[Y2];\
1181 case PIX_FMT_BGR24:\
1183 ((uint8_t*)dest)[0]= b[Y1];\
1184 ((uint8_t*)dest)[1]= g[Y1];\
1185 ((uint8_t*)dest)[2]= r[Y1];\
1186 ((uint8_t*)dest)[3]= b[Y2];\
1187 ((uint8_t*)dest)[4]= g[Y2];\
1188 ((uint8_t*)dest)[5]= r[Y2];\
1192 case PIX_FMT_RGB565:\
1193 case PIX_FMT_BGR565:\
1195 const int dr1= dither_2x2_8[y&1 ][0];\
1196 const int dg1= dither_2x2_4[y&1 ][0];\
1197 const int db1= dither_2x2_8[(y&1)^1][0];\
1198 const int dr2= dither_2x2_8[y&1 ][1];\
1199 const int dg2= dither_2x2_4[y&1 ][1];\
1200 const int db2= dither_2x2_8[(y&1)^1][1];\
1202 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1203 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1207 case PIX_FMT_RGB555:\
1208 case PIX_FMT_BGR555:\
1210 const int dr1= dither_2x2_8[y&1 ][0];\
1211 const int dg1= dither_2x2_8[y&1 ][1];\
1212 const int db1= dither_2x2_8[(y&1)^1][0];\
1213 const int dr2= dither_2x2_8[y&1 ][1];\
1214 const int dg2= dither_2x2_8[y&1 ][0];\
1215 const int db2= dither_2x2_8[(y&1)^1][1];\
1217 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1218 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1222 case PIX_FMT_RGB444:\
1223 case PIX_FMT_BGR444:\
1225 const int dr1= dither_4x4_16[y&3 ][0];\
1226 const int dg1= dither_4x4_16[y&3 ][1];\
1227 const int db1= dither_4x4_16[(y&3)^3][0];\
1228 const int dr2= dither_4x4_16[y&3 ][1];\
1229 const int dg2= dither_4x4_16[y&3 ][0];\
1230 const int db2= dither_4x4_16[(y&3)^3][1];\
1232 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1233 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1240 const uint8_t * const d64= dither_8x8_73[y&7];\
1241 const uint8_t * const d32= dither_8x8_32[y&7];\
1243 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
1244 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
1251 const uint8_t * const d64= dither_8x8_73 [y&7];\
1252 const uint8_t * const d128=dither_8x8_220[y&7];\
1254 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
1255 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
1259 case PIX_FMT_RGB4_BYTE:\
1260 case PIX_FMT_BGR4_BYTE:\
1262 const uint8_t * const d64= dither_8x8_73 [y&7];\
1263 const uint8_t * const d128=dither_8x8_220[y&7];\
1265 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
1266 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
1272 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
1273 const int16_t **lumSrc, int lumFilterSize,
1274 const int16_t *chrFilter, const int16_t **chrUSrc,
1275 const int16_t **chrVSrc, int chrFilterSize,
1276 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1279 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
1282 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
1283 const int16_t **lumSrc, int lumFilterSize,
1284 const int16_t *chrFilter, const int16_t **chrUSrc,
1285 const int16_t **chrVSrc, int chrFilterSize,
1286 const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1289 int step= c->dstFormatBpp/8;
1292 switch(c->dstFormat) {
1300 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1301 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1302 dest[aidx]= needAlpha ? A : 255;
1309 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1310 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1318 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1335 int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1336 YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1337 dest[aidx]= needAlpha ? A : 255;
1344 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1345 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1353 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1369 * vertical bilinear scale YV12 to RGB
1371 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
1372 const uint16_t *buf1, const uint16_t *ubuf0,
1373 const uint16_t *ubuf1, const uint16_t *vbuf0,
1374 const uint16_t *vbuf1, const uint16_t *abuf0,
1375 const uint16_t *abuf1, uint8_t *dest, int dstW,
1376 int yalpha, int uvalpha, int y)
1378 int yalpha1=4095- yalpha;
1379 int uvalpha1=4095-uvalpha;
1382 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
1386 * YV12 to RGB without scaling or interpolating
1388 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
1389 const uint16_t *ubuf0, const uint16_t *ubuf1,
1390 const uint16_t *vbuf0, const uint16_t *vbuf1,
1391 const uint16_t *abuf0, uint8_t *dest, int dstW,
1392 int uvalpha, enum PixelFormat dstFormat,
1397 if (uvalpha < 2048) {
1398 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
1400 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
1404 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1405 int width, int height,
1409 uint8_t *ptr = plane + stride*y;
1410 for (i=0; i<height; i++) {
1411 memset(ptr, val, width);
1416 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1418 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1419 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1421 static av_always_inline void
1422 rgb48ToY_c_template(int16_t *dst, const uint16_t *src, int width,
1423 enum PixelFormat origin)
1426 for (i = 0; i < width; i++) {
1427 int r_b = input_pixel(&src[i*3+0]);
1428 int g = input_pixel(&src[i*3+1]);
1429 int b_r = input_pixel(&src[i*3+2]);
1431 dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
1435 static av_always_inline void
1436 rgb48ToUV_c_template(int16_t *dstU, int16_t *dstV,
1437 const uint16_t *src1, const uint16_t *src2,
1438 int width, enum PixelFormat origin)
1442 for (i = 0; i < width; i++) {
1443 int r_b = input_pixel(&src1[i*3+0]);
1444 int g = input_pixel(&src1[i*3+1]);
1445 int b_r = input_pixel(&src1[i*3+2]);
1447 dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
1448 dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
1452 static av_always_inline void
1453 rgb48ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1454 const uint16_t *src1, const uint16_t *src2,
1455 int width, enum PixelFormat origin)
1459 for (i = 0; i < width; i++) {
1460 int r_b = (input_pixel(&src1[6*i + 0])) + (input_pixel(&src1[6*i + 3]));
1461 int g = (input_pixel(&src1[6*i + 1])) + (input_pixel(&src1[6*i + 4]));
1462 int b_r = (input_pixel(&src1[6*i + 2])) + (input_pixel(&src1[6*i + 5]));
1464 dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);
1465 dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);
1473 #define rgb48funcs(pattern, BE_LE, origin) \
1474 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
1475 int width, uint32_t *unused) \
1477 rgb48ToY_c_template(dst, src, width, origin); \
1480 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1481 const uint8_t *src1, const uint8_t *src2, \
1482 int width, uint32_t *unused) \
1484 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1487 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1488 const uint8_t *src1, const uint8_t *src2, \
1489 int width, uint32_t *unused) \
1491 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1494 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1495 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1496 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1497 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1499 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1500 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1501 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1503 static av_always_inline void
1504 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1505 int width, enum PixelFormat origin,
1506 int shr, int shg, int shb, int shp,
1507 int maskr, int maskg, int maskb,
1508 int rsh, int gsh, int bsh, int S)
1510 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1511 rnd = (32<<((S)-1)) + (1<<(S-7));
1514 for (i = 0; i < width; i++) {
1515 int px = input_pixel(i) >> shp;
1516 int b = (px & maskb) >> shb;
1517 int g = (px & maskg) >> shg;
1518 int r = (px & maskr) >> shr;
1520 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1524 static av_always_inline void
1525 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1526 const uint8_t *src, int width,
1527 enum PixelFormat origin,
1528 int shr, int shg, int shb, int shp,
1529 int maskr, int maskg, int maskb,
1530 int rsh, int gsh, int bsh, int S)
1532 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1533 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1534 rnd = (256<<((S)-1)) + (1<<(S-7));
1537 for (i = 0; i < width; i++) {
1538 int px = input_pixel(i) >> shp;
1539 int b = (px & maskb) >> shb;
1540 int g = (px & maskg) >> shg;
1541 int r = (px & maskr) >> shr;
1543 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1544 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1548 static av_always_inline void
1549 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1550 const uint8_t *src, int width,
1551 enum PixelFormat origin,
1552 int shr, int shg, int shb, int shp,
1553 int maskr, int maskg, int maskb,
1554 int rsh, int gsh, int bsh, int S)
1556 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1557 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1558 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1561 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1562 for (i = 0; i < width; i++) {
1563 int px0 = input_pixel(2 * i + 0) >> shp;
1564 int px1 = input_pixel(2 * i + 1) >> shp;
1565 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1566 int rb = px0 + px1 - g;
1568 b = (rb & maskb) >> shb;
1569 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1570 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1573 g = (g & maskg) >> shg;
1575 r = (rb & maskr) >> shr;
1577 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1578 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1584 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1585 maskg, maskb, rsh, gsh, bsh, S) \
1586 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1587 int width, uint32_t *unused) \
1589 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1590 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1593 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1594 const uint8_t *src, const uint8_t *dummy, \
1595 int width, uint32_t *unused) \
1597 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1598 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1601 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1602 const uint8_t *src, const uint8_t *dummy, \
1603 int width, uint32_t *unused) \
1605 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1606 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1609 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1610 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1611 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1612 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1613 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1614 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1615 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1616 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1617 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1618 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1619 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1620 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1622 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1625 for (i=0; i<width; i++) {
1626 dst[i]= src[4*i]<<6;
1630 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1633 for (i=0; i<width; i++) {
1634 dst[i]= src[4*i+3]<<6;
1638 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1641 for (i=0; i<width; i++) {
1644 dst[i]= (pal[d] >> 24)<<6;
1648 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1651 for (i=0; i<width; i++) {
1654 dst[i]= (pal[d] & 0xFF)<<6;
1658 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1659 const uint8_t *src1, const uint8_t *src2,
1660 int width, uint32_t *pal)
1663 assert(src1 == src2);
1664 for (i=0; i<width; i++) {
1665 int p= pal[src1[i]];
1667 dstU[i]= (uint8_t)(p>> 8)<<6;
1668 dstV[i]= (uint8_t)(p>>16)<<6;
1672 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1675 for (i=0; i<width/8; i++) {
1678 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1682 for(j=0; j<(width&7); j++)
1683 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1687 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1690 for (i=0; i<width/8; i++) {
1693 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1697 for(j=0; j<(width&7); j++)
1698 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1702 //FIXME yuy2* can read up to 7 samples too much
1704 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1708 for (i=0; i<width; i++)
1712 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1713 const uint8_t *src2, int width, uint32_t *unused)
1716 for (i=0; i<width; i++) {
1717 dstU[i]= src1[4*i + 1];
1718 dstV[i]= src1[4*i + 3];
1720 assert(src1 == src2);
1723 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1724 const uint8_t *src2, int width, uint32_t *unused)
1727 for (i=0; i<width; i++) {
1728 dstU[i]= src1[2*i + 1];
1729 dstV[i]= src2[2*i + 1];
1733 /* This is almost identical to the previous, end exists only because
1734 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1735 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1739 for (i=0; i<width; i++)
1743 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1744 const uint8_t *src2, int width, uint32_t *unused)
1747 for (i=0; i<width; i++) {
1748 dstU[i]= src1[4*i + 0];
1749 dstV[i]= src1[4*i + 2];
1751 assert(src1 == src2);
1754 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1755 const uint8_t *src2, int width, uint32_t *unused)
1758 for (i=0; i<width; i++) {
1764 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1765 const uint8_t *src, int width)
1768 for (i = 0; i < width; i++) {
1769 dst1[i] = src[2*i+0];
1770 dst2[i] = src[2*i+1];
1774 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1775 const uint8_t *src1, const uint8_t *src2,
1776 int width, uint32_t *unused)
1778 nvXXtoUV_c(dstU, dstV, src1, width);
1781 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1782 const uint8_t *src1, const uint8_t *src2,
1783 int width, uint32_t *unused)
1785 nvXXtoUV_c(dstV, dstU, src1, width);
1788 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1790 // FIXME Maybe dither instead.
1791 static av_always_inline void
1792 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1793 const uint8_t *_srcU, const uint8_t *_srcV,
1794 int width, enum PixelFormat origin, int depth)
1797 const uint16_t *srcU = (const uint16_t *) _srcU;
1798 const uint16_t *srcV = (const uint16_t *) _srcV;
1800 for (i = 0; i < width; i++) {
1801 dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1802 dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1806 static av_always_inline void
1807 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1808 int width, enum PixelFormat origin, int depth)
1811 const uint16_t *srcY = (const uint16_t*)_srcY;
1813 for (i = 0; i < width; i++)
1814 dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1819 #define YUV_NBPS(depth, BE_LE, origin) \
1820 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1821 const uint8_t *srcU, const uint8_t *srcV, \
1822 int width, uint32_t *unused) \
1824 yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1826 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1827 int width, uint32_t *unused) \
1829 yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1832 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1833 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1834 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1835 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1837 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1838 int width, uint32_t *unused)
1841 for (i=0; i<width; i++) {
1846 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1850 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1851 const uint8_t *src2, int width, uint32_t *unused)
1854 for (i=0; i<width; i++) {
1855 int b= src1[3*i + 0];
1856 int g= src1[3*i + 1];
1857 int r= src1[3*i + 2];
1859 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1860 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1862 assert(src1 == src2);
1865 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1866 const uint8_t *src2, int width, uint32_t *unused)
1869 for (i=0; i<width; i++) {
1870 int b= src1[6*i + 0] + src1[6*i + 3];
1871 int g= src1[6*i + 1] + src1[6*i + 4];
1872 int r= src1[6*i + 2] + src1[6*i + 5];
1874 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1875 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1877 assert(src1 == src2);
1880 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1884 for (i=0; i<width; i++) {
1889 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1893 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1894 const uint8_t *src2, int width, uint32_t *unused)
1898 for (i=0; i<width; i++) {
1899 int r= src1[3*i + 0];
1900 int g= src1[3*i + 1];
1901 int b= src1[3*i + 2];
1903 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1904 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1908 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1909 const uint8_t *src2, int width, uint32_t *unused)
1913 for (i=0; i<width; i++) {
1914 int r= src1[6*i + 0] + src1[6*i + 3];
1915 int g= src1[6*i + 1] + src1[6*i + 4];
1916 int b= src1[6*i + 2] + src1[6*i + 5];
1918 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1919 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1923 // bilinear / bicubic scaling
1924 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
1925 const int16_t *filter, const int16_t *filterPos,
1929 for (i=0; i<dstW; i++) {
1931 int srcPos= filterPos[i];
1933 for (j=0; j<filterSize; j++) {
1934 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1936 //filter += hFilterSize;
1937 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1942 static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
1943 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
1947 for (i=0; i<dstW; i++) {
1948 int srcPos= filterPos[i];
1950 for (j=0; j<filterSize; j++) {
1951 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1953 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
1957 static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
1958 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
1961 for (i=0; i<dstW; i++) {
1962 int srcPos= filterPos[i];
1964 for (j=0; j<filterSize; j++) {
1965 val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
1967 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
1971 //FIXME all pal and rgb srcFormats could do this convertion as well
1972 //FIXME all scalers more complex than bilinear could do half of this transform
1973 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1976 for (i = 0; i < width; i++) {
1977 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1978 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1981 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1984 for (i = 0; i < width; i++) {
1985 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1986 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1989 static void lumRangeToJpeg_c(int16_t *dst, int width)
1992 for (i = 0; i < width; i++)
1993 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1995 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1998 for (i = 0; i < width; i++)
1999 dst[i] = (dst[i]*14071 + 33561947)>>14;
2002 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2003 const uint8_t *src, int srcW, int xInc)
2006 unsigned int xpos=0;
2007 for (i=0;i<dstWidth;i++) {
2008 register unsigned int xx=xpos>>16;
2009 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2010 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2013 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2014 dst[i] = src[srcW-1]*128;
2017 // *** horizontal scale Y line to temp buffer
2018 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
2019 const uint8_t *src, int srcW, int xInc,
2020 const int16_t *hLumFilter,
2021 const int16_t *hLumFilterPos, int hLumFilterSize,
2022 uint8_t *formatConvBuffer,
2023 uint32_t *pal, int isAlpha)
2025 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2026 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2029 toYV12(formatConvBuffer, src, srcW, pal);
2030 src= formatConvBuffer;
2034 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2035 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2036 } else if (!c->hyscale_fast) {
2037 c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2038 } else { // fast bilinear upscale / crap downscale
2039 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2043 convertRange(dst, dstWidth);
2046 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2047 int dstWidth, const uint8_t *src1,
2048 const uint8_t *src2, int srcW, int xInc)
2051 unsigned int xpos=0;
2052 for (i=0;i<dstWidth;i++) {
2053 register unsigned int xx=xpos>>16;
2054 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2055 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2056 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2059 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2060 dst1[i] = src1[srcW-1]*128;
2061 dst2[i] = src2[srcW-1]*128;
2065 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
2066 const uint8_t *src1, const uint8_t *src2,
2067 int srcW, int xInc, const int16_t *hChrFilter,
2068 const int16_t *hChrFilterPos, int hChrFilterSize,
2069 uint8_t *formatConvBuffer, uint32_t *pal)
2072 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2073 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2074 src1= formatConvBuffer;
2079 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2080 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2081 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2082 } else if (!c->hcscale_fast) {
2083 c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2084 c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2085 } else { // fast bilinear upscale / crap downscale
2086 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2089 if (c->chrConvertRange)
2090 c->chrConvertRange(dst1, dst2, dstWidth);
2093 static av_always_inline void
2094 find_c_packed_planar_out_funcs(SwsContext *c,
2095 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2096 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2097 yuv2packedX_fn *yuv2packedX)
2099 enum PixelFormat dstFormat = c->dstFormat;
2101 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2102 *yuv2yuvX = yuv2nv12X_c;
2103 } else if (is16BPS(dstFormat)) {
2104 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2105 } else if (is9_OR_10BPS(dstFormat)) {
2106 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2107 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2109 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2112 *yuv2yuv1 = yuv2yuv1_c;
2113 *yuv2yuvX = yuv2yuvX_c;
2115 if(c->flags & SWS_FULL_CHR_H_INT) {
2116 *yuv2packedX = yuv2rgbX_c_full;
2118 switch (dstFormat) {
2119 case PIX_FMT_GRAY16BE:
2120 *yuv2packed1 = yuv2gray16BE_1_c;
2121 *yuv2packed2 = yuv2gray16BE_2_c;
2122 *yuv2packedX = yuv2gray16BE_X_c;
2124 case PIX_FMT_GRAY16LE:
2125 *yuv2packed1 = yuv2gray16LE_1_c;
2126 *yuv2packed2 = yuv2gray16LE_2_c;
2127 *yuv2packedX = yuv2gray16LE_X_c;
2129 case PIX_FMT_MONOWHITE:
2130 *yuv2packed1 = yuv2monowhite_1_c;
2131 *yuv2packed2 = yuv2monowhite_2_c;
2132 *yuv2packedX = yuv2monowhite_X_c;
2134 case PIX_FMT_MONOBLACK:
2135 *yuv2packed1 = yuv2monoblack_1_c;
2136 *yuv2packed2 = yuv2monoblack_2_c;
2137 *yuv2packedX = yuv2monoblack_X_c;
2139 case PIX_FMT_YUYV422:
2140 *yuv2packed1 = yuv2yuyv422_1_c;
2141 *yuv2packed2 = yuv2yuyv422_2_c;
2142 *yuv2packedX = yuv2yuyv422_X_c;
2144 case PIX_FMT_UYVY422:
2145 *yuv2packed1 = yuv2uyvy422_1_c;
2146 *yuv2packed2 = yuv2uyvy422_2_c;
2147 *yuv2packedX = yuv2uyvy422_X_c;
2149 case PIX_FMT_RGB48LE:
2150 //*yuv2packed1 = yuv2rgb48le_1_c;
2151 //*yuv2packed2 = yuv2rgb48le_2_c;
2152 //*yuv2packedX = yuv2rgb48le_X_c;
2154 case PIX_FMT_RGB48BE:
2155 *yuv2packed1 = yuv2rgb48be_1_c;
2156 *yuv2packed2 = yuv2rgb48be_2_c;
2157 *yuv2packedX = yuv2rgb48be_X_c;
2159 case PIX_FMT_BGR48LE:
2160 //*yuv2packed1 = yuv2bgr48le_1_c;
2161 //*yuv2packed2 = yuv2bgr48le_2_c;
2162 //*yuv2packedX = yuv2bgr48le_X_c;
2164 case PIX_FMT_BGR48BE:
2165 *yuv2packed1 = yuv2bgr48be_1_c;
2166 *yuv2packed2 = yuv2bgr48be_2_c;
2167 *yuv2packedX = yuv2bgr48be_X_c;
2170 *yuv2packed1 = yuv2packed1_c;
2171 *yuv2packed2 = yuv2packed2_c;
2172 *yuv2packedX = yuv2packedX_c;
2178 #define DEBUG_SWSCALE_BUFFERS 0
2179 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2181 static int swScale(SwsContext *c, const uint8_t* src[],
2182 int srcStride[], int srcSliceY,
2183 int srcSliceH, uint8_t* dst[], int dstStride[])
2185 /* load a few things into local vars to make the code more readable? and faster */
2186 const int srcW= c->srcW;
2187 const int dstW= c->dstW;
2188 const int dstH= c->dstH;
2189 const int chrDstW= c->chrDstW;
2190 const int chrSrcW= c->chrSrcW;
2191 const int lumXInc= c->lumXInc;
2192 const int chrXInc= c->chrXInc;
2193 const enum PixelFormat dstFormat= c->dstFormat;
2194 const int flags= c->flags;
2195 int16_t *vLumFilterPos= c->vLumFilterPos;
2196 int16_t *vChrFilterPos= c->vChrFilterPos;
2197 int16_t *hLumFilterPos= c->hLumFilterPos;
2198 int16_t *hChrFilterPos= c->hChrFilterPos;
2199 int16_t *vLumFilter= c->vLumFilter;
2200 int16_t *vChrFilter= c->vChrFilter;
2201 int16_t *hLumFilter= c->hLumFilter;
2202 int16_t *hChrFilter= c->hChrFilter;
2203 int32_t *lumMmxFilter= c->lumMmxFilter;
2204 int32_t *chrMmxFilter= c->chrMmxFilter;
2205 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2206 const int vLumFilterSize= c->vLumFilterSize;
2207 const int vChrFilterSize= c->vChrFilterSize;
2208 const int hLumFilterSize= c->hLumFilterSize;
2209 const int hChrFilterSize= c->hChrFilterSize;
2210 int16_t **lumPixBuf= c->lumPixBuf;
2211 int16_t **chrUPixBuf= c->chrUPixBuf;
2212 int16_t **chrVPixBuf= c->chrVPixBuf;
2213 int16_t **alpPixBuf= c->alpPixBuf;
2214 const int vLumBufSize= c->vLumBufSize;
2215 const int vChrBufSize= c->vChrBufSize;
2216 uint8_t *formatConvBuffer= c->formatConvBuffer;
2217 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2218 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2220 uint32_t *pal=c->pal_yuv;
2221 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2222 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2223 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2224 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2225 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2226 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2228 /* vars which will change and which we need to store back in the context */
2230 int lumBufIndex= c->lumBufIndex;
2231 int chrBufIndex= c->chrBufIndex;
2232 int lastInLumBuf= c->lastInLumBuf;
2233 int lastInChrBuf= c->lastInChrBuf;
2235 if (isPacked(c->srcFormat)) {
2243 srcStride[3]= srcStride[0];
2245 srcStride[1]<<= c->vChrDrop;
2246 srcStride[2]<<= c->vChrDrop;
2248 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2249 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2250 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2251 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2252 srcSliceY, srcSliceH, dstY, dstH);
2253 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2254 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2256 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2257 static int warnedAlready=0; //FIXME move this into the context perhaps
2258 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2259 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2260 " ->cannot do aligned memory accesses anymore\n");
2265 /* Note the user might start scaling the picture in the middle so this
2266 will not get executed. This is not really intended but works
2267 currently, so people might do it. */
2268 if (srcSliceY ==0) {
2278 for (;dstY < dstH; dstY++) {
2279 unsigned char *dest =dst[0]+dstStride[0]*dstY;
2280 const int chrDstY= dstY>>c->chrDstVSubSample;
2281 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2282 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2283 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2284 const uint8_t *lumDither= should_dither ? dithers[7][dstY &7] : flat64;
2285 const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
2287 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2288 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2289 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2290 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2291 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2292 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2295 //handle holes (FAST_BILINEAR & weird filters)
2296 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2297 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2298 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2299 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2301 DEBUG_BUFFERS("dstY: %d\n", dstY);
2302 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2303 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2304 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2305 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2307 // Do we have enough lines in this slice to output the dstY line
2308 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2310 if (!enough_lines) {
2311 lastLumSrcY = srcSliceY + srcSliceH - 1;
2312 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2313 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2314 lastLumSrcY, lastChrSrcY);
2317 //Do horizontal scaling
2318 while(lastInLumBuf < lastLumSrcY) {
2319 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2320 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2322 assert(lumBufIndex < 2*vLumBufSize);
2323 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2324 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2325 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2326 hLumFilter, hLumFilterPos, hLumFilterSize,
2329 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2330 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2331 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2335 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2336 lumBufIndex, lastInLumBuf);
2338 while(lastInChrBuf < lastChrSrcY) {
2339 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2340 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2342 assert(chrBufIndex < 2*vChrBufSize);
2343 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2344 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2345 //FIXME replace parameters through context struct (some at least)
2347 if (c->needs_hcscale)
2348 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2349 chrDstW, src1, src2, chrSrcW, chrXInc,
2350 hChrFilter, hChrFilterPos, hChrFilterSize,
2351 formatConvBuffer, pal);
2353 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2354 chrBufIndex, lastInChrBuf);
2356 //wrap buf index around to stay inside the ring buffer
2357 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2358 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2360 break; //we can't output a dstY line so let's try with the next slice
2363 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2365 if (dstY >= dstH-2) {
2366 // hmm looks like we can't use MMX here without overwriting this array's tail
2367 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2368 &yuv2packed1, &yuv2packed2,
2373 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2374 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2375 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2376 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2378 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2379 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2380 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
2381 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2382 const int16_t *lumBuf = lumSrcPtr[0];
2383 const int16_t *chrUBuf= chrUSrcPtr[0];
2384 const int16_t *chrVBuf= chrVSrcPtr[0];
2385 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2386 yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
2387 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
2388 } else { //General YV12
2390 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2391 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
2392 chrVSrcPtr, vChrFilterSize,
2393 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
2396 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2397 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2398 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2399 int chrAlpha= vChrFilter[2*dstY+1];
2400 yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
2401 *chrVSrcPtr, *(chrVSrcPtr+1),
2402 alpPixBuf ? *alpSrcPtr : NULL,
2403 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2404 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2405 int lumAlpha= vLumFilter[2*dstY+1];
2406 int chrAlpha= vChrFilter[2*dstY+1];
2408 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
2410 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
2411 yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
2412 *chrVSrcPtr, *(chrVSrcPtr+1),
2413 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2414 dest, dstW, lumAlpha, chrAlpha, dstY);
2415 } else { //general RGB
2417 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2418 vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2419 alpSrcPtr, dest, dstW, dstY);
2425 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2426 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2429 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2430 __asm__ volatile("sfence":::"memory");
2434 /* store changed local vars back in the context */
2436 c->lumBufIndex= lumBufIndex;
2437 c->chrBufIndex= chrBufIndex;
2438 c->lastInLumBuf= lastInLumBuf;
2439 c->lastInChrBuf= lastInChrBuf;
2441 return dstY - lastDstY;
2444 static av_cold void sws_init_swScale_c(SwsContext *c)
2446 enum PixelFormat srcFormat = c->srcFormat;
2448 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2449 &c->yuv2packed1, &c->yuv2packed2,
2452 c->hScale = hScale_c;
2454 if (c->flags & SWS_FAST_BILINEAR) {
2455 c->hyscale_fast = hyscale_fast_c;
2456 c->hcscale_fast = hcscale_fast_c;
2459 c->chrToYV12 = NULL;
2461 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2462 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2463 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2464 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2468 case PIX_FMT_BGR4_BYTE:
2469 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2470 case PIX_FMT_GRAY16BE :
2471 case PIX_FMT_YUV444P9BE:
2472 case PIX_FMT_YUV420P9BE:
2473 case PIX_FMT_YUV444P10BE:
2474 case PIX_FMT_YUV422P10BE:
2475 case PIX_FMT_YUV420P10BE:
2476 case PIX_FMT_YUV420P16BE:
2477 case PIX_FMT_YUV422P16BE:
2478 case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
2479 case PIX_FMT_GRAY16LE :
2480 case PIX_FMT_YUV444P9LE:
2481 case PIX_FMT_YUV420P9LE:
2482 case PIX_FMT_YUV422P10LE:
2483 case PIX_FMT_YUV420P10LE:
2484 case PIX_FMT_YUV444P10LE:
2485 case PIX_FMT_YUV420P16LE:
2486 case PIX_FMT_YUV422P16LE:
2487 case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
2489 if (c->chrSrcHSubSample) {
2491 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2492 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2493 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2494 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2495 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2496 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2497 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2498 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2499 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2500 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2501 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2502 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2503 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2504 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2505 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2506 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2507 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2508 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2512 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2513 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2514 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2515 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2516 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2517 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2518 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2519 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2520 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2521 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2522 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2523 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2524 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2525 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2526 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2527 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2528 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2529 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2533 c->lumToYV12 = NULL;
2534 c->alpToYV12 = NULL;
2535 switch (srcFormat) {
2536 case PIX_FMT_YUYV422 :
2537 case PIX_FMT_GRAY8A :
2538 c->lumToYV12 = yuy2ToY_c; break;
2539 case PIX_FMT_UYVY422 :
2540 c->lumToYV12 = uyvyToY_c; break;
2541 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2542 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2543 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2544 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2545 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2546 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2547 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2548 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2549 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2550 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2554 case PIX_FMT_BGR4_BYTE:
2555 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2556 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2557 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2558 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2559 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2560 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2561 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2562 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2563 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2564 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2565 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2568 switch (srcFormat) {
2570 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2572 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2573 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2574 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2578 if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
2579 c->hScale16= hScale16_c;
2581 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2583 c->lumConvertRange = lumRangeFromJpeg_c;
2584 c->chrConvertRange = chrRangeFromJpeg_c;
2586 c->lumConvertRange = lumRangeToJpeg_c;
2587 c->chrConvertRange = chrRangeToJpeg_c;
2591 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2592 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2593 c->needs_hcscale = 1;
2596 SwsFunc ff_getSwsFunc(SwsContext *c)
2598 sws_init_swScale_c(c);
2601 ff_sws_init_swScale_mmx(c);
2603 ff_sws_init_swScale_altivec(c);