2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/avassert.h"
64 #include "libavutil/intreadwrite.h"
65 #include "libavutil/cpu.h"
66 #include "libavutil/avutil.h"
67 #include "libavutil/mathematics.h"
68 #include "libavutil/bswap.h"
69 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
186 { 36, 68, 60, 92, 34, 66, 58, 90,},
187 { 100, 4,124, 28, 98, 2,122, 26,},
188 { 52, 84, 44, 76, 50, 82, 42, 74,},
189 { 116, 20,108, 12,114, 18,106, 10,},
190 { 32, 64, 56, 88, 38, 70, 62, 94,},
191 { 96, 0,120, 24,102, 6,126, 30,},
192 { 48, 80, 40, 72, 54, 86, 46, 78,},
193 { 112, 16,104, 8,118, 22,110, 14,},
195 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
196 { 64, 64, 64, 64, 64, 64, 64, 64 };
198 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
200 { 0, 1, 0, 1, 0, 1, 0, 1,},
201 { 1, 0, 1, 0, 1, 0, 1, 0,},
202 { 0, 1, 0, 1, 0, 1, 0, 1,},
203 { 1, 0, 1, 0, 1, 0, 1, 0,},
204 { 0, 1, 0, 1, 0, 1, 0, 1,},
205 { 1, 0, 1, 0, 1, 0, 1, 0,},
206 { 0, 1, 0, 1, 0, 1, 0, 1,},
207 { 1, 0, 1, 0, 1, 0, 1, 0,},
209 { 1, 2, 1, 2, 1, 2, 1, 2,},
210 { 3, 0, 3, 0, 3, 0, 3, 0,},
211 { 1, 2, 1, 2, 1, 2, 1, 2,},
212 { 3, 0, 3, 0, 3, 0, 3, 0,},
213 { 1, 2, 1, 2, 1, 2, 1, 2,},
214 { 3, 0, 3, 0, 3, 0, 3, 0,},
215 { 1, 2, 1, 2, 1, 2, 1, 2,},
216 { 3, 0, 3, 0, 3, 0, 3, 0,},
218 { 2, 4, 3, 5, 2, 4, 3, 5,},
219 { 6, 0, 7, 1, 6, 0, 7, 1,},
220 { 3, 5, 2, 4, 3, 5, 2, 4,},
221 { 7, 1, 6, 0, 7, 1, 6, 0,},
222 { 2, 4, 3, 5, 2, 4, 3, 5,},
223 { 6, 0, 7, 1, 6, 0, 7, 1,},
224 { 3, 5, 2, 4, 3, 5, 2, 4,},
225 { 7, 1, 6, 0, 7, 1, 6, 0,},
227 { 4, 8, 7, 11, 4, 8, 7, 11,},
228 { 12, 0, 15, 3, 12, 0, 15, 3,},
229 { 6, 10, 5, 9, 6, 10, 5, 9,},
230 { 14, 2, 13, 1, 14, 2, 13, 1,},
231 { 4, 8, 7, 11, 4, 8, 7, 11,},
232 { 12, 0, 15, 3, 12, 0, 15, 3,},
233 { 6, 10, 5, 9, 6, 10, 5, 9,},
234 { 14, 2, 13, 1, 14, 2, 13, 1,},
236 { 9, 17, 15, 23, 8, 16, 14, 22,},
237 { 25, 1, 31, 7, 24, 0, 30, 6,},
238 { 13, 21, 11, 19, 12, 20, 10, 18,},
239 { 29, 5, 27, 3, 28, 4, 26, 2,},
240 { 8, 16, 14, 22, 9, 17, 15, 23,},
241 { 24, 0, 30, 6, 25, 1, 31, 7,},
242 { 12, 20, 10, 18, 13, 21, 11, 19,},
243 { 28, 4, 26, 2, 29, 5, 27, 3,},
245 { 18, 34, 30, 46, 17, 33, 29, 45,},
246 { 50, 2, 62, 14, 49, 1, 61, 13,},
247 { 26, 42, 22, 38, 25, 41, 21, 37,},
248 { 58, 10, 54, 6, 57, 9, 53, 5,},
249 { 16, 32, 28, 44, 19, 35, 31, 47,},
250 { 48, 0, 60, 12, 51, 3, 63, 15,},
251 { 24, 40, 20, 36, 27, 43, 23, 39,},
252 { 56, 8, 52, 4, 59, 11, 55, 7,},
254 { 18, 34, 30, 46, 17, 33, 29, 45,},
255 { 50, 2, 62, 14, 49, 1, 61, 13,},
256 { 26, 42, 22, 38, 25, 41, 21, 37,},
257 { 58, 10, 54, 6, 57, 9, 53, 5,},
258 { 16, 32, 28, 44, 19, 35, 31, 47,},
259 { 48, 0, 60, 12, 51, 3, 63, 15,},
260 { 24, 40, 20, 36, 27, 43, 23, 39,},
261 { 56, 8, 52, 4, 59, 11, 55, 7,},
263 { 36, 68, 60, 92, 34, 66, 58, 90,},
264 { 100, 4,124, 28, 98, 2,122, 26,},
265 { 52, 84, 44, 76, 50, 82, 42, 74,},
266 { 116, 20,108, 12,114, 18,106, 10,},
267 { 32, 64, 56, 88, 38, 70, 62, 94,},
268 { 96, 0,120, 24,102, 6,126, 30,},
269 { 48, 80, 40, 72, 54, 86, 46, 78,},
270 { 112, 16,104, 8,118, 22,110, 14,},
273 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
275 const uint16_t dither_scale[15][16]={
276 { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
277 { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
278 { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
279 { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
280 { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
281 { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
282 { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
283 { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
284 { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
285 { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
286 { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
287 { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
288 { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
289 { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
290 { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
293 static av_always_inline void
294 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
295 int lumFilterSize, const int16_t *chrFilter,
296 const int32_t **chrUSrc, const int32_t **chrVSrc,
297 int chrFilterSize, const int32_t **alpSrc,
298 uint16_t *dest[4], int dstW, int chrDstW,
299 int big_endian, int output_bits)
301 //FIXME Optimize (just quickly written not optimized..)
303 int dword= output_bits == 16;
304 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
305 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
306 int shift = 11 + 4*dword + 16 - output_bits - 1;
308 #define output_pixel(pos, val) \
310 AV_WB16(pos, av_clip_uint16(val >> shift)); \
312 AV_WL16(pos, av_clip_uint16(val >> shift)); \
314 for (i = 0; i < dstW; i++) {
315 int val = 1 << (26-output_bits + 4*dword - 1);
318 for (j = 0; j < lumFilterSize; j++)
319 val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
321 output_pixel(&yDest[i], val);
325 for (i = 0; i < chrDstW; i++) {
326 int u = 1 << (26-output_bits + 4*dword - 1);
327 int v = 1 << (26-output_bits + 4*dword - 1);
330 for (j = 0; j < chrFilterSize; j++) {
331 u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
332 v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
335 output_pixel(&uDest[i], u);
336 output_pixel(&vDest[i], v);
340 if (CONFIG_SWSCALE_ALPHA && aDest) {
341 for (i = 0; i < dstW; i++) {
342 int val = 1 << (26-output_bits + 4*dword - 1);
345 for (j = 0; j < lumFilterSize; j++)
346 val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
348 output_pixel(&aDest[i], val);
354 static av_always_inline void
355 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
356 int lumFilterSize, const int16_t *chrFilter,
357 const int16_t **chrUSrc, const int16_t **chrVSrc,
358 int chrFilterSize, const int16_t **alpSrc,
359 uint16_t *dest[4], int dstW, int chrDstW,
360 int big_endian, int output_bits)
362 //FIXME Optimize (just quickly written not optimized..)
364 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
365 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
366 int shift = 11 + 16 - output_bits - 1;
368 #define output_pixel(pos, val) \
370 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
372 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
374 for (i = 0; i < dstW; i++) {
375 int val = 1 << (26-output_bits - 1);
378 for (j = 0; j < lumFilterSize; j++)
379 val += (lumSrc[j][i] * lumFilter[j]) >> 1;
381 output_pixel(&yDest[i], val);
385 for (i = 0; i < chrDstW; i++) {
386 int u = 1 << (26-output_bits - 1);
387 int v = 1 << (26-output_bits - 1);
390 for (j = 0; j < chrFilterSize; j++) {
391 u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
392 v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
395 output_pixel(&uDest[i], u);
396 output_pixel(&vDest[i], v);
400 if (CONFIG_SWSCALE_ALPHA && aDest) {
401 for (i = 0; i < dstW; i++) {
402 int val = 1 << (26-output_bits - 1);
405 for (j = 0; j < lumFilterSize; j++)
406 val += (alpSrc[j][i] * lumFilter[j]) >> 1;
408 output_pixel(&aDest[i], val);
414 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
415 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
416 const int16_t **_lumSrc, int lumFilterSize, \
417 const int16_t *chrFilter, const int16_t **_chrUSrc, \
418 const int16_t **_chrVSrc, \
419 int chrFilterSize, const int16_t **_alpSrc, \
420 uint8_t *_dest[4], int dstW, int chrDstW) \
422 const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
423 **chrUSrc = (const typeX_t **) _chrUSrc, \
424 **chrVSrc = (const typeX_t **) _chrVSrc, \
425 **alpSrc = (const typeX_t **) _alpSrc; \
426 yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
427 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
428 alpSrc, (uint16_t **) _dest, \
429 dstW, chrDstW, is_be, bits); \
431 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
432 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
433 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
434 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
435 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
436 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
438 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
439 const int16_t **lumSrc, int lumFilterSize,
440 const int16_t *chrFilter, const int16_t **chrUSrc,
441 const int16_t **chrVSrc,
442 int chrFilterSize, const int16_t **alpSrc,
443 uint8_t *dest[4], int dstW, int chrDstW)
445 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
446 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
448 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
450 //FIXME Optimize (just quickly written not optimized..)
451 for (i=0; i<dstW; i++) {
452 int val = lumDither[i & 7] << 12;
454 for (j=0; j<lumFilterSize; j++)
455 val += lumSrc[j][i] * lumFilter[j];
457 yDest[i]= av_clip_uint8(val>>19);
461 for (i=0; i<chrDstW; i++) {
462 int u = chrDither[i & 7] << 12;
463 int v = chrDither[(i + 3) & 7] << 12;
465 for (j=0; j<chrFilterSize; j++) {
466 u += chrUSrc[j][i] * chrFilter[j];
467 v += chrVSrc[j][i] * chrFilter[j];
470 uDest[i]= av_clip_uint8(u>>19);
471 vDest[i]= av_clip_uint8(v>>19);
474 if (CONFIG_SWSCALE_ALPHA && aDest)
475 for (i=0; i<dstW; i++) {
476 int val = lumDither[i & 7] << 12;
478 for (j=0; j<lumFilterSize; j++)
479 val += alpSrc[j][i] * lumFilter[j];
481 aDest[i]= av_clip_uint8(val>>19);
485 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
486 const int16_t *chrUSrc, const int16_t *chrVSrc,
487 const int16_t *alpSrc,
488 uint8_t *dest[4], int dstW, int chrDstW)
490 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
491 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
493 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
495 for (i=0; i<dstW; i++) {
496 int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
497 yDest[i]= av_clip_uint8(val);
501 for (i=0; i<chrDstW; i++) {
502 int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
503 int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
504 uDest[i]= av_clip_uint8(u);
505 vDest[i]= av_clip_uint8(v);
508 if (CONFIG_SWSCALE_ALPHA && aDest)
509 for (i=0; i<dstW; i++) {
510 int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
511 aDest[i]= av_clip_uint8(val);
515 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
516 const int16_t **lumSrc, int lumFilterSize,
517 const int16_t *chrFilter, const int16_t **chrUSrc,
518 const int16_t **chrVSrc, int chrFilterSize,
519 const int16_t **alpSrc, uint8_t *dest[4],
520 int dstW, int chrDstW)
522 uint8_t *yDest = dest[0], *uDest = dest[1];
523 enum PixelFormat dstFormat = c->dstFormat;
524 const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
526 //FIXME Optimize (just quickly written not optimized..)
528 for (i=0; i<dstW; i++) {
529 int val = lumDither[i & 7] << 12;
531 for (j=0; j<lumFilterSize; j++)
532 val += lumSrc[j][i] * lumFilter[j];
534 yDest[i]= av_clip_uint8(val>>19);
540 if (dstFormat == PIX_FMT_NV12)
541 for (i=0; i<chrDstW; i++) {
542 int u = chrDither[i & 7] << 12;
543 int v = chrDither[(i + 3) & 7] << 12;
545 for (j=0; j<chrFilterSize; j++) {
546 u += chrUSrc[j][i] * chrFilter[j];
547 v += chrVSrc[j][i] * chrFilter[j];
550 uDest[2*i]= av_clip_uint8(u>>19);
551 uDest[2*i+1]= av_clip_uint8(v>>19);
554 for (i=0; i<chrDstW; i++) {
555 int u = chrDither[i & 7] << 12;
556 int v = chrDither[(i + 3) & 7] << 12;
558 for (j=0; j<chrFilterSize; j++) {
559 u += chrUSrc[j][i] * chrFilter[j];
560 v += chrVSrc[j][i] * chrFilter[j];
563 uDest[2*i]= av_clip_uint8(v>>19);
564 uDest[2*i+1]= av_clip_uint8(u>>19);
568 #define output_pixel(pos, val) \
569 if (target == PIX_FMT_GRAY16BE) { \
575 static av_always_inline void
576 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
577 const int32_t **lumSrc, int lumFilterSize,
578 const int16_t *chrFilter, const int32_t **chrUSrc,
579 const int32_t **chrVSrc, int chrFilterSize,
580 const int32_t **alpSrc, uint16_t *dest, int dstW,
581 int y, enum PixelFormat target)
585 for (i = 0; i < (dstW >> 1); i++) {
590 for (j = 0; j < lumFilterSize; j++) {
591 Y1 += lumSrc[j][i * 2] * lumFilter[j];
592 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
596 if ((Y1 | Y2) & 0x10000) {
597 Y1 = av_clip_uint16(Y1);
598 Y2 = av_clip_uint16(Y2);
600 output_pixel(&dest[i * 2 + 0], Y1);
601 output_pixel(&dest[i * 2 + 1], Y2);
605 static av_always_inline void
606 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
607 const int32_t *ubuf[2], const int32_t *vbuf[2],
608 const int32_t *abuf[2], uint16_t *dest, int dstW,
609 int yalpha, int uvalpha, int y,
610 enum PixelFormat target)
612 int yalpha1 = 4095 - yalpha;
614 const int32_t *buf0 = buf[0], *buf1 = buf[1];
616 for (i = 0; i < (dstW >> 1); i++) {
617 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
618 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
620 output_pixel(&dest[i * 2 + 0], Y1);
621 output_pixel(&dest[i * 2 + 1], Y2);
625 static av_always_inline void
626 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
627 const int32_t *ubuf[2], const int32_t *vbuf[2],
628 const int32_t *abuf0, uint16_t *dest, int dstW,
629 int uvalpha, int y, enum PixelFormat target)
633 for (i = 0; i < (dstW >> 1); i++) {
634 int Y1 = (buf0[i * 2 ]+4)>>3;
635 int Y2 = (buf0[i * 2 + 1]+4)>>3;
637 output_pixel(&dest[i * 2 + 0], Y1);
638 output_pixel(&dest[i * 2 + 1], Y2);
644 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
645 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
646 const int16_t **_lumSrc, int lumFilterSize, \
647 const int16_t *chrFilter, const int16_t **_chrUSrc, \
648 const int16_t **_chrVSrc, int chrFilterSize, \
649 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
652 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
653 **chrUSrc = (const int32_t **) _chrUSrc, \
654 **chrVSrc = (const int32_t **) _chrVSrc, \
655 **alpSrc = (const int32_t **) _alpSrc; \
656 uint16_t *dest = (uint16_t *) _dest; \
657 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
658 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
659 alpSrc, dest, dstW, y, fmt); \
662 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
663 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
664 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
665 int yalpha, int uvalpha, int y) \
667 const int32_t **buf = (const int32_t **) _buf, \
668 **ubuf = (const int32_t **) _ubuf, \
669 **vbuf = (const int32_t **) _vbuf, \
670 **abuf = (const int32_t **) _abuf; \
671 uint16_t *dest = (uint16_t *) _dest; \
672 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
673 dest, dstW, yalpha, uvalpha, y, fmt); \
676 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
677 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
678 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
679 int uvalpha, int y) \
681 const int32_t *buf0 = (const int32_t *) _buf0, \
682 **ubuf = (const int32_t **) _ubuf, \
683 **vbuf = (const int32_t **) _vbuf, \
684 *abuf0 = (const int32_t *) _abuf0; \
685 uint16_t *dest = (uint16_t *) _dest; \
686 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
687 dstW, uvalpha, y, fmt); \
690 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
691 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
693 #define output_pixel(pos, acc) \
694 if (target == PIX_FMT_MONOBLACK) { \
700 static av_always_inline void
701 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
702 const int16_t **lumSrc, int lumFilterSize,
703 const int16_t *chrFilter, const int16_t **chrUSrc,
704 const int16_t **chrVSrc, int chrFilterSize,
705 const int16_t **alpSrc, uint8_t *dest, int dstW,
706 int y, enum PixelFormat target)
708 const uint8_t * const d128=dither_8x8_220[y&7];
709 uint8_t *g = c->table_gU[128] + c->table_gV[128];
713 for (i = 0; i < dstW - 1; i += 2) {
718 for (j = 0; j < lumFilterSize; j++) {
719 Y1 += lumSrc[j][i] * lumFilter[j];
720 Y2 += lumSrc[j][i+1] * lumFilter[j];
724 if ((Y1 | Y2) & 0x100) {
725 Y1 = av_clip_uint8(Y1);
726 Y2 = av_clip_uint8(Y2);
728 acc += acc + g[Y1 + d128[(i + 0) & 7]];
729 acc += acc + g[Y2 + d128[(i + 1) & 7]];
731 output_pixel(*dest++, acc);
736 static av_always_inline void
737 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
738 const int16_t *ubuf[2], const int16_t *vbuf[2],
739 const int16_t *abuf[2], uint8_t *dest, int dstW,
740 int yalpha, int uvalpha, int y,
741 enum PixelFormat target)
743 const int16_t *buf0 = buf[0], *buf1 = buf[1];
744 const uint8_t * const d128 = dither_8x8_220[y & 7];
745 uint8_t *g = c->table_gU[128] + c->table_gV[128];
746 int yalpha1 = 4095 - yalpha;
749 for (i = 0; i < dstW - 7; i += 8) {
750 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
751 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
752 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
753 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
754 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
755 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
756 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
757 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
758 output_pixel(*dest++, acc);
762 static av_always_inline void
763 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
764 const int16_t *ubuf[2], const int16_t *vbuf[2],
765 const int16_t *abuf0, uint8_t *dest, int dstW,
766 int uvalpha, int y, enum PixelFormat target)
768 const uint8_t * const d128 = dither_8x8_220[y & 7];
769 uint8_t *g = c->table_gU[128] + c->table_gV[128];
772 for (i = 0; i < dstW - 7; i += 8) {
773 int acc = g[(buf0[i ] >> 7) + d128[0]];
774 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
775 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
776 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
777 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
778 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
779 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
780 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
781 output_pixel(*dest++, acc);
787 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
788 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
789 const int16_t **lumSrc, int lumFilterSize, \
790 const int16_t *chrFilter, const int16_t **chrUSrc, \
791 const int16_t **chrVSrc, int chrFilterSize, \
792 const int16_t **alpSrc, uint8_t *dest, int dstW, \
795 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
796 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
797 alpSrc, dest, dstW, y, fmt); \
800 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
801 const int16_t *ubuf[2], const int16_t *vbuf[2], \
802 const int16_t *abuf[2], uint8_t *dest, int dstW, \
803 int yalpha, int uvalpha, int y) \
805 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
806 dest, dstW, yalpha, uvalpha, y, fmt); \
809 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
810 const int16_t *ubuf[2], const int16_t *vbuf[2], \
811 const int16_t *abuf0, uint8_t *dest, int dstW, \
812 int uvalpha, int y) \
814 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
815 abuf0, dest, dstW, uvalpha, \
819 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
820 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
822 #define output_pixels(pos, Y1, U, Y2, V) \
823 if (target == PIX_FMT_YUYV422) { \
824 dest[pos + 0] = Y1; \
826 dest[pos + 2] = Y2; \
830 dest[pos + 1] = Y1; \
832 dest[pos + 3] = Y2; \
835 static av_always_inline void
836 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
837 const int16_t **lumSrc, int lumFilterSize,
838 const int16_t *chrFilter, const int16_t **chrUSrc,
839 const int16_t **chrVSrc, int chrFilterSize,
840 const int16_t **alpSrc, uint8_t *dest, int dstW,
841 int y, enum PixelFormat target)
845 for (i = 0; i < (dstW >> 1); i++) {
852 for (j = 0; j < lumFilterSize; j++) {
853 Y1 += lumSrc[j][i * 2] * lumFilter[j];
854 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
856 for (j = 0; j < chrFilterSize; j++) {
857 U += chrUSrc[j][i] * chrFilter[j];
858 V += chrVSrc[j][i] * chrFilter[j];
864 if ((Y1 | Y2 | U | V) & 0x100) {
865 Y1 = av_clip_uint8(Y1);
866 Y2 = av_clip_uint8(Y2);
867 U = av_clip_uint8(U);
868 V = av_clip_uint8(V);
870 output_pixels(4*i, Y1, U, Y2, V);
874 static av_always_inline void
875 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
876 const int16_t *ubuf[2], const int16_t *vbuf[2],
877 const int16_t *abuf[2], uint8_t *dest, int dstW,
878 int yalpha, int uvalpha, int y,
879 enum PixelFormat target)
881 const int16_t *buf0 = buf[0], *buf1 = buf[1],
882 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
883 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
884 int yalpha1 = 4095 - yalpha;
885 int uvalpha1 = 4095 - uvalpha;
888 for (i = 0; i < (dstW >> 1); i++) {
889 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
890 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
891 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
892 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
894 output_pixels(i * 4, Y1, U, Y2, V);
898 static av_always_inline void
899 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
900 const int16_t *ubuf[2], const int16_t *vbuf[2],
901 const int16_t *abuf0, uint8_t *dest, int dstW,
902 int uvalpha, int y, enum PixelFormat target)
904 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
905 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
908 if (uvalpha < 2048) {
909 for (i = 0; i < (dstW >> 1); i++) {
910 int Y1 = buf0[i * 2] >> 7;
911 int Y2 = buf0[i * 2 + 1] >> 7;
912 int U = ubuf1[i] >> 7;
913 int V = vbuf1[i] >> 7;
915 output_pixels(i * 4, Y1, U, Y2, V);
918 for (i = 0; i < (dstW >> 1); i++) {
919 int Y1 = buf0[i * 2] >> 7;
920 int Y2 = buf0[i * 2 + 1] >> 7;
921 int U = (ubuf0[i] + ubuf1[i]) >> 8;
922 int V = (vbuf0[i] + vbuf1[i]) >> 8;
924 output_pixels(i * 4, Y1, U, Y2, V);
931 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
932 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
934 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
935 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
936 #define output_pixel(pos, val) \
937 if (isBE(target)) { \
943 static av_always_inline void
944 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
945 const int32_t **lumSrc, int lumFilterSize,
946 const int16_t *chrFilter, const int32_t **chrUSrc,
947 const int32_t **chrVSrc, int chrFilterSize,
948 const int32_t **alpSrc, uint16_t *dest, int dstW,
949 int y, enum PixelFormat target)
953 for (i = 0; i < (dstW >> 1); i++) {
957 int U = -128 << 23; // 19
961 for (j = 0; j < lumFilterSize; j++) {
962 Y1 += lumSrc[j][i * 2] * lumFilter[j];
963 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
965 for (j = 0; j < chrFilterSize; j++) {
966 U += chrUSrc[j][i] * chrFilter[j];
967 V += chrVSrc[j][i] * chrFilter[j];
970 // 8bit: 12+15=27; 16-bit: 12+19=31
976 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
977 Y1 -= c->yuv2rgb_y_offset;
978 Y2 -= c->yuv2rgb_y_offset;
979 Y1 *= c->yuv2rgb_y_coeff;
980 Y2 *= c->yuv2rgb_y_coeff;
983 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
985 R = V * c->yuv2rgb_v2r_coeff;
986 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
987 B = U * c->yuv2rgb_u2b_coeff;
989 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
990 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
991 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
992 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
993 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
994 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
995 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1000 static av_always_inline void
1001 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
1002 const int32_t *ubuf[2], const int32_t *vbuf[2],
1003 const int32_t *abuf[2], uint16_t *dest, int dstW,
1004 int yalpha, int uvalpha, int y,
1005 enum PixelFormat target)
1007 const int32_t *buf0 = buf[0], *buf1 = buf[1],
1008 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1009 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1010 int yalpha1 = 4095 - yalpha;
1011 int uvalpha1 = 4095 - uvalpha;
1014 for (i = 0; i < (dstW >> 1); i++) {
1015 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
1016 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
1017 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
1018 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
1021 Y1 -= c->yuv2rgb_y_offset;
1022 Y2 -= c->yuv2rgb_y_offset;
1023 Y1 *= c->yuv2rgb_y_coeff;
1024 Y2 *= c->yuv2rgb_y_coeff;
1028 R = V * c->yuv2rgb_v2r_coeff;
1029 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1030 B = U * c->yuv2rgb_u2b_coeff;
1032 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1033 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1034 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1035 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1036 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1037 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1042 static av_always_inline void
1043 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
1044 const int32_t *ubuf[2], const int32_t *vbuf[2],
1045 const int32_t *abuf0, uint16_t *dest, int dstW,
1046 int uvalpha, int y, enum PixelFormat target)
1048 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1049 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1052 if (uvalpha < 2048) {
1053 for (i = 0; i < (dstW >> 1); i++) {
1054 int Y1 = (buf0[i * 2] ) >> 2;
1055 int Y2 = (buf0[i * 2 + 1]) >> 2;
1056 int U = (ubuf0[i] + (-128 << 11)) >> 2;
1057 int V = (vbuf0[i] + (-128 << 11)) >> 2;
1060 Y1 -= c->yuv2rgb_y_offset;
1061 Y2 -= c->yuv2rgb_y_offset;
1062 Y1 *= c->yuv2rgb_y_coeff;
1063 Y2 *= c->yuv2rgb_y_coeff;
1067 R = V * c->yuv2rgb_v2r_coeff;
1068 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1069 B = U * c->yuv2rgb_u2b_coeff;
1071 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1072 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1073 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1074 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1075 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1076 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1080 for (i = 0; i < (dstW >> 1); i++) {
1081 int Y1 = (buf0[i * 2] ) >> 2;
1082 int Y2 = (buf0[i * 2 + 1]) >> 2;
1083 int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
1084 int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
1087 Y1 -= c->yuv2rgb_y_offset;
1088 Y2 -= c->yuv2rgb_y_offset;
1089 Y1 *= c->yuv2rgb_y_coeff;
1090 Y2 *= c->yuv2rgb_y_coeff;
1094 R = V * c->yuv2rgb_v2r_coeff;
1095 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1096 B = U * c->yuv2rgb_u2b_coeff;
1098 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
1099 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
1100 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
1101 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
1102 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
1103 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
1113 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
1114 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
1115 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
1116 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
1118 static av_always_inline void
1119 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1120 int U, int V, int A1, int A2,
1121 const void *_r, const void *_g, const void *_b, int y,
1122 enum PixelFormat target, int hasAlpha)
1124 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
1125 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
1126 uint32_t *dest = (uint32_t *) _dest;
1127 const uint32_t *r = (const uint32_t *) _r;
1128 const uint32_t *g = (const uint32_t *) _g;
1129 const uint32_t *b = (const uint32_t *) _b;
1132 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1134 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1135 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1138 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
1140 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1141 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1143 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1144 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1147 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
1148 uint8_t *dest = (uint8_t *) _dest;
1149 const uint8_t *r = (const uint8_t *) _r;
1150 const uint8_t *g = (const uint8_t *) _g;
1151 const uint8_t *b = (const uint8_t *) _b;
1153 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
1154 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
1156 dest[i * 6 + 0] = r_b[Y1];
1157 dest[i * 6 + 1] = g[Y1];
1158 dest[i * 6 + 2] = b_r[Y1];
1159 dest[i * 6 + 3] = r_b[Y2];
1160 dest[i * 6 + 4] = g[Y2];
1161 dest[i * 6 + 5] = b_r[Y2];
1164 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1165 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1166 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1167 uint16_t *dest = (uint16_t *) _dest;
1168 const uint16_t *r = (const uint16_t *) _r;
1169 const uint16_t *g = (const uint16_t *) _g;
1170 const uint16_t *b = (const uint16_t *) _b;
1171 int dr1, dg1, db1, dr2, dg2, db2;
1173 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1174 dr1 = dither_2x2_8[ y & 1 ][0];
1175 dg1 = dither_2x2_4[ y & 1 ][0];
1176 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1177 dr2 = dither_2x2_8[ y & 1 ][1];
1178 dg2 = dither_2x2_4[ y & 1 ][1];
1179 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1180 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1181 dr1 = dither_2x2_8[ y & 1 ][0];
1182 dg1 = dither_2x2_8[ y & 1 ][1];
1183 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1184 dr2 = dither_2x2_8[ y & 1 ][1];
1185 dg2 = dither_2x2_8[ y & 1 ][0];
1186 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1188 dr1 = dither_4x4_16[ y & 3 ][0];
1189 dg1 = dither_4x4_16[ y & 3 ][1];
1190 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1191 dr2 = dither_4x4_16[ y & 3 ][1];
1192 dg2 = dither_4x4_16[ y & 3 ][0];
1193 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1196 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1197 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1198 } else /* 8/4-bit */ {
1199 uint8_t *dest = (uint8_t *) _dest;
1200 const uint8_t *r = (const uint8_t *) _r;
1201 const uint8_t *g = (const uint8_t *) _g;
1202 const uint8_t *b = (const uint8_t *) _b;
1203 int dr1, dg1, db1, dr2, dg2, db2;
1205 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1206 const uint8_t * const d64 = dither_8x8_73[y & 7];
1207 const uint8_t * const d32 = dither_8x8_32[y & 7];
1208 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1209 db1 = d64[(i * 2 + 0) & 7];
1210 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1211 db2 = d64[(i * 2 + 1) & 7];
1213 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1214 const uint8_t * const d128 = dither_8x8_220[y & 7];
1215 dr1 = db1 = d128[(i * 2 + 0) & 7];
1216 dg1 = d64[(i * 2 + 0) & 7];
1217 dr2 = db2 = d128[(i * 2 + 1) & 7];
1218 dg2 = d64[(i * 2 + 1) & 7];
1221 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1222 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1223 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1225 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1226 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1231 static av_always_inline void
1232 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1233 const int16_t **lumSrc, int lumFilterSize,
1234 const int16_t *chrFilter, const int16_t **chrUSrc,
1235 const int16_t **chrVSrc, int chrFilterSize,
1236 const int16_t **alpSrc, uint8_t *dest, int dstW,
1237 int y, enum PixelFormat target, int hasAlpha)
1241 for (i = 0; i < (dstW >> 1); i++) {
1247 int av_unused A1, A2;
1248 const void *r, *g, *b;
1250 for (j = 0; j < lumFilterSize; j++) {
1251 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1252 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1254 for (j = 0; j < chrFilterSize; j++) {
1255 U += chrUSrc[j][i] * chrFilter[j];
1256 V += chrVSrc[j][i] * chrFilter[j];
1262 if ((Y1 | Y2 | U | V) & 0x100) {
1263 Y1 = av_clip_uint8(Y1);
1264 Y2 = av_clip_uint8(Y2);
1265 U = av_clip_uint8(U);
1266 V = av_clip_uint8(V);
1271 for (j = 0; j < lumFilterSize; j++) {
1272 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1273 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1277 if ((A1 | A2) & 0x100) {
1278 A1 = av_clip_uint8(A1);
1279 A2 = av_clip_uint8(A2);
1283 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1285 g = (c->table_gU[U] + c->table_gV[V]);
1288 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1289 r, g, b, y, target, hasAlpha);
1293 static av_always_inline void
1294 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1295 const int16_t *ubuf[2], const int16_t *vbuf[2],
1296 const int16_t *abuf[2], uint8_t *dest, int dstW,
1297 int yalpha, int uvalpha, int y,
1298 enum PixelFormat target, int hasAlpha)
1300 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1301 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1302 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1303 *abuf0 = hasAlpha ? abuf[0] : NULL,
1304 *abuf1 = hasAlpha ? abuf[1] : NULL;
1305 int yalpha1 = 4095 - yalpha;
1306 int uvalpha1 = 4095 - uvalpha;
1309 for (i = 0; i < (dstW >> 1); i++) {
1310 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1311 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1312 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1313 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1315 const void *r = c->table_rV[V],
1316 *g = (c->table_gU[U] + c->table_gV[V]),
1317 *b = c->table_bU[U];
1320 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1321 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1324 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1325 r, g, b, y, target, hasAlpha);
1329 static av_always_inline void
1330 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1331 const int16_t *ubuf[2], const int16_t *vbuf[2],
1332 const int16_t *abuf0, uint8_t *dest, int dstW,
1333 int uvalpha, int y, enum PixelFormat target,
1336 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1337 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1340 if (uvalpha < 2048) {
1341 for (i = 0; i < (dstW >> 1); i++) {
1342 int Y1 = buf0[i * 2] >> 7;
1343 int Y2 = buf0[i * 2 + 1] >> 7;
1344 int U = ubuf1[i] >> 7;
1345 int V = vbuf1[i] >> 7;
1347 const void *r = c->table_rV[V],
1348 *g = (c->table_gU[U] + c->table_gV[V]),
1349 *b = c->table_bU[U];
1352 A1 = abuf0[i * 2 ] >> 7;
1353 A2 = abuf0[i * 2 + 1] >> 7;
1356 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1357 r, g, b, y, target, hasAlpha);
1360 for (i = 0; i < (dstW >> 1); i++) {
1361 int Y1 = buf0[i * 2] >> 7;
1362 int Y2 = buf0[i * 2 + 1] >> 7;
1363 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1364 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1366 const void *r = c->table_rV[V],
1367 *g = (c->table_gU[U] + c->table_gV[V]),
1368 *b = c->table_bU[U];
1371 A1 = abuf0[i * 2 ] >> 7;
1372 A2 = abuf0[i * 2 + 1] >> 7;
1375 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1376 r, g, b, y, target, hasAlpha);
1381 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1382 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1383 const int16_t **lumSrc, int lumFilterSize, \
1384 const int16_t *chrFilter, const int16_t **chrUSrc, \
1385 const int16_t **chrVSrc, int chrFilterSize, \
1386 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1389 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1390 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1391 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1393 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1394 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1395 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1396 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1397 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1398 int yalpha, int uvalpha, int y) \
1400 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1401 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1404 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1405 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1406 const int16_t *abuf0, uint8_t *dest, int dstW, \
1407 int uvalpha, int y) \
1409 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1410 dstW, uvalpha, y, fmt, hasAlpha); \
1414 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1415 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1417 #if CONFIG_SWSCALE_ALPHA
1418 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1419 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1421 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1422 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1424 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1425 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1426 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1427 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1428 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1429 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1430 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1431 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1433 static av_always_inline void
1434 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1435 const int16_t **lumSrc, int lumFilterSize,
1436 const int16_t *chrFilter, const int16_t **chrUSrc,
1437 const int16_t **chrVSrc, int chrFilterSize,
1438 const int16_t **alpSrc, uint8_t *dest,
1439 int dstW, int y, enum PixelFormat target, int hasAlpha)
1442 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1444 for (i = 0; i < dstW; i++) {
1447 int U = (1<<9)-(128 << 19);
1448 int V = (1<<9)-(128 << 19);
1452 for (j = 0; j < lumFilterSize; j++) {
1453 Y += lumSrc[j][i] * lumFilter[j];
1455 for (j = 0; j < chrFilterSize; j++) {
1456 U += chrUSrc[j][i] * chrFilter[j];
1457 V += chrVSrc[j][i] * chrFilter[j];
1464 for (j = 0; j < lumFilterSize; j++) {
1465 A += alpSrc[j][i] * lumFilter[j];
1469 A = av_clip_uint8(A);
1471 Y -= c->yuv2rgb_y_offset;
1472 Y *= c->yuv2rgb_y_coeff;
1474 R = Y + V*c->yuv2rgb_v2r_coeff;
1475 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1476 B = Y + U*c->yuv2rgb_u2b_coeff;
1477 if ((R | G | B) & 0xC0000000) {
1478 R = av_clip_uintp2(R, 30);
1479 G = av_clip_uintp2(G, 30);
1480 B = av_clip_uintp2(B, 30);
1485 dest[0] = hasAlpha ? A : 255;
1499 dest[3] = hasAlpha ? A : 255;
1502 dest[0] = hasAlpha ? A : 255;
1516 dest[3] = hasAlpha ? A : 255;
1524 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1525 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1526 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1527 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1529 #if CONFIG_SWSCALE_ALPHA
1530 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1531 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1532 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1533 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1535 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1536 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1537 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1538 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1540 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1541 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1543 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1544 int width, int height,
1548 uint8_t *ptr = plane + stride*y;
1549 for (i=0; i<height; i++) {
1550 memset(ptr, val, width);
1555 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1557 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1558 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1560 static av_always_inline void
1561 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1562 enum PixelFormat origin)
1565 for (i = 0; i < width; i++) {
1566 unsigned int r_b = input_pixel(&src[i*3+0]);
1567 unsigned int g = input_pixel(&src[i*3+1]);
1568 unsigned int b_r = input_pixel(&src[i*3+2]);
1570 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1574 static av_always_inline void
1575 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1576 const uint16_t *src1, const uint16_t *src2,
1577 int width, enum PixelFormat origin)
1581 for (i = 0; i < width; i++) {
1582 int r_b = input_pixel(&src1[i*3+0]);
1583 int g = input_pixel(&src1[i*3+1]);
1584 int b_r = input_pixel(&src1[i*3+2]);
1586 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1587 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1591 static av_always_inline void
1592 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1593 const uint16_t *src1, const uint16_t *src2,
1594 int width, enum PixelFormat origin)
1598 for (i = 0; i < width; i++) {
1599 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1600 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1601 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1603 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1604 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1612 #define rgb48funcs(pattern, BE_LE, origin) \
1613 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1614 int width, uint32_t *unused) \
1616 const uint16_t *src = (const uint16_t *) _src; \
1617 uint16_t *dst = (uint16_t *) _dst; \
1618 rgb48ToY_c_template(dst, src, width, origin); \
1621 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1622 const uint8_t *_src1, const uint8_t *_src2, \
1623 int width, uint32_t *unused) \
1625 const uint16_t *src1 = (const uint16_t *) _src1, \
1626 *src2 = (const uint16_t *) _src2; \
1627 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1628 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1631 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1632 const uint8_t *_src1, const uint8_t *_src2, \
1633 int width, uint32_t *unused) \
1635 const uint16_t *src1 = (const uint16_t *) _src1, \
1636 *src2 = (const uint16_t *) _src2; \
1637 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1638 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1641 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1642 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1643 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1644 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1646 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1647 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1648 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1650 static av_always_inline void
1651 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
1652 int width, enum PixelFormat origin,
1653 int shr, int shg, int shb, int shp,
1654 int maskr, int maskg, int maskb,
1655 int rsh, int gsh, int bsh, int S)
1657 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1658 rnd = (32<<((S)-1)) + (1<<(S-7));
1661 for (i = 0; i < width; i++) {
1662 int px = input_pixel(i) >> shp;
1663 int b = (px & maskb) >> shb;
1664 int g = (px & maskg) >> shg;
1665 int r = (px & maskr) >> shr;
1667 dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
1671 static av_always_inline void
1672 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
1673 const uint8_t *src, int width,
1674 enum PixelFormat origin,
1675 int shr, int shg, int shb, int shp,
1676 int maskr, int maskg, int maskb,
1677 int rsh, int gsh, int bsh, int S)
1679 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1680 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1681 rnd = (256<<((S)-1)) + (1<<(S-7));
1684 for (i = 0; i < width; i++) {
1685 int px = input_pixel(i) >> shp;
1686 int b = (px & maskb) >> shb;
1687 int g = (px & maskg) >> shg;
1688 int r = (px & maskr) >> shr;
1690 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
1691 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
1695 static av_always_inline void
1696 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
1697 const uint8_t *src, int width,
1698 enum PixelFormat origin,
1699 int shr, int shg, int shb, int shp,
1700 int maskr, int maskg, int maskb,
1701 int rsh, int gsh, int bsh, int S)
1703 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1704 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1705 rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
1708 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1709 for (i = 0; i < width; i++) {
1710 int px0 = input_pixel(2 * i + 0) >> shp;
1711 int px1 = input_pixel(2 * i + 1) >> shp;
1712 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1713 int rb = px0 + px1 - g;
1715 b = (rb & maskb) >> shb;
1716 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1717 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1720 g = (g & maskg) >> shg;
1722 r = (rb & maskr) >> shr;
1724 dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
1725 dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
1731 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1732 maskg, maskb, rsh, gsh, bsh, S) \
1733 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1734 int width, uint32_t *unused) \
1736 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1737 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1740 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1741 const uint8_t *src, const uint8_t *dummy, \
1742 int width, uint32_t *unused) \
1744 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1745 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1748 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1749 const uint8_t *src, const uint8_t *dummy, \
1750 int width, uint32_t *unused) \
1752 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1753 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1756 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1757 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1758 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1759 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1760 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1761 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1762 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1763 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1764 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1765 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1766 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1767 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1769 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1772 for (i=0; i<width; i++) {
1773 dst[i]= src[4*i]<<6;
1777 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1780 for (i=0; i<width; i++) {
1781 dst[i]= src[4*i+3]<<6;
1785 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
1788 for (i=0; i<width; i++) {
1791 dst[i]= (pal[d] >> 24)<<6;
1795 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
1798 for (i=0; i<width; i++) {
1801 dst[i]= (pal[d] & 0xFF)<<6;
1805 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
1806 const uint8_t *src1, const uint8_t *src2,
1807 int width, uint32_t *pal)
1810 assert(src1 == src2);
1811 for (i=0; i<width; i++) {
1812 int p= pal[src1[i]];
1814 dstU[i]= (uint8_t)(p>> 8)<<6;
1815 dstV[i]= (uint8_t)(p>>16)<<6;
1819 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1822 for (i=0; i<width/8; i++) {
1825 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1829 for(j=0; j<(width&7); j++)
1830 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1834 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
1837 for (i=0; i<width/8; i++) {
1840 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1844 for(j=0; j<(width&7); j++)
1845 dst[8*i+j]= ((d>>(7-j))&1)*16383;
1849 //FIXME yuy2* can read up to 7 samples too much
1851 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1855 for (i=0; i<width; i++)
1859 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1860 const uint8_t *src2, int width, uint32_t *unused)
1863 for (i=0; i<width; i++) {
1864 dstU[i]= src1[4*i + 1];
1865 dstV[i]= src1[4*i + 3];
1867 assert(src1 == src2);
1870 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1873 const uint16_t *src = (const uint16_t *) _src;
1874 uint16_t *dst = (uint16_t *) _dst;
1875 for (i=0; i<width; i++) {
1876 dst[i] = av_bswap16(src[i]);
1880 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1881 const uint8_t *_src2, int width, uint32_t *unused)
1884 const uint16_t *src1 = (const uint16_t *) _src1,
1885 *src2 = (const uint16_t *) _src2;
1886 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1887 for (i=0; i<width; i++) {
1888 dstU[i] = av_bswap16(src1[i]);
1889 dstV[i] = av_bswap16(src2[i]);
1893 /* This is almost identical to the previous, end exists only because
1894 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1895 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1899 for (i=0; i<width; i++)
1903 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1904 const uint8_t *src2, int width, uint32_t *unused)
1907 for (i=0; i<width; i++) {
1908 dstU[i]= src1[4*i + 0];
1909 dstV[i]= src1[4*i + 2];
1911 assert(src1 == src2);
1914 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1915 const uint8_t *src, int width)
1918 for (i = 0; i < width; i++) {
1919 dst1[i] = src[2*i+0];
1920 dst2[i] = src[2*i+1];
1924 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1925 const uint8_t *src1, const uint8_t *src2,
1926 int width, uint32_t *unused)
1928 nvXXtoUV_c(dstU, dstV, src1, width);
1931 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1932 const uint8_t *src1, const uint8_t *src2,
1933 int width, uint32_t *unused)
1935 nvXXtoUV_c(dstV, dstU, src1, width);
1938 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1940 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1941 int width, uint32_t *unused)
1944 for (i=0; i<width; i++) {
1949 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1953 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1954 const uint8_t *src2, int width, uint32_t *unused)
1957 for (i=0; i<width; i++) {
1958 int b= src1[3*i + 0];
1959 int g= src1[3*i + 1];
1960 int r= src1[3*i + 2];
1962 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1963 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
1965 assert(src1 == src2);
1968 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1969 const uint8_t *src2, int width, uint32_t *unused)
1972 for (i=0; i<width; i++) {
1973 int b= src1[6*i + 0] + src1[6*i + 3];
1974 int g= src1[6*i + 1] + src1[6*i + 4];
1975 int r= src1[6*i + 2] + src1[6*i + 5];
1977 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1978 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
1980 assert(src1 == src2);
1983 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
1987 for (i=0; i<width; i++) {
1992 dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
1996 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
1997 const uint8_t *src2, int width, uint32_t *unused)
2001 for (i=0; i<width; i++) {
2002 int r= src1[3*i + 0];
2003 int g= src1[3*i + 1];
2004 int b= src1[3*i + 2];
2006 dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2007 dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
2011 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
2012 const uint8_t *src2, int width, uint32_t *unused)
2016 for (i=0; i<width; i++) {
2017 int r= src1[6*i + 0] + src1[6*i + 3];
2018 int g= src1[6*i + 1] + src1[6*i + 4];
2019 int b= src1[6*i + 2] + src1[6*i + 5];
2021 dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2022 dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
2026 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
2027 const int16_t *filter,
2028 const int16_t *filterPos, int filterSize)
2031 int32_t *dst = (int32_t *) _dst;
2032 const uint16_t *src = (const uint16_t *) _src;
2033 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2036 if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2039 for (i = 0; i < dstW; i++) {
2041 int srcPos = filterPos[i];
2044 for (j = 0; j < filterSize; j++) {
2045 val += src[srcPos + j] * filter[filterSize * i + j];
2047 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
2048 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
2052 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
2053 const int16_t *filter,
2054 const int16_t *filterPos, int filterSize)
2057 const uint16_t *src = (const uint16_t *) _src;
2058 int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2060 for (i = 0; i < dstW; i++) {
2062 int srcPos = filterPos[i];
2065 for (j = 0; j < filterSize; j++) {
2066 val += src[srcPos + j] * filter[filterSize * i + j];
2068 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
2069 dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
2073 // bilinear / bicubic scaling
2074 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
2075 const int16_t *filter, const int16_t *filterPos,
2079 for (i=0; i<dstW; i++) {
2081 int srcPos= filterPos[i];
2083 for (j=0; j<filterSize; j++) {
2084 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2086 //filter += hFilterSize;
2087 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
2092 static inline void hScale16N_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2093 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2097 for (i=0; i<dstW; i++) {
2098 int srcPos= filterPos[i];
2100 for (j=0; j<filterSize; j++) {
2101 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2103 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2107 static inline void hScale16NX_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
2108 const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
2111 for (i=0; i<dstW; i++) {
2112 int srcPos= filterPos[i];
2114 for (j=0; j<filterSize; j++) {
2115 val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
2117 dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
2121 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
2122 const int16_t *filter, const int16_t *filterPos,
2126 int32_t *dst = (int32_t *) _dst;
2127 for (i=0; i<dstW; i++) {
2129 int srcPos= filterPos[i];
2131 for (j=0; j<filterSize; j++) {
2132 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
2134 //filter += hFilterSize;
2135 dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
2140 //FIXME all pal and rgb srcFormats could do this convertion as well
2141 //FIXME all scalers more complex than bilinear could do half of this transform
2142 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2145 for (i = 0; i < width; i++) {
2146 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
2147 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
2150 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
2153 for (i = 0; i < width; i++) {
2154 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
2155 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
2158 static void lumRangeToJpeg_c(int16_t *dst, int width)
2161 for (i = 0; i < width; i++)
2162 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
2164 static void lumRangeFromJpeg_c(int16_t *dst, int width)
2167 for (i = 0; i < width; i++)
2168 dst[i] = (dst[i]*14071 + 33561947)>>14;
2171 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2174 int32_t *dstU = (int32_t *) _dstU;
2175 int32_t *dstV = (int32_t *) _dstV;
2176 for (i = 0; i < width; i++) {
2177 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2178 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
2181 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
2184 int32_t *dstU = (int32_t *) _dstU;
2185 int32_t *dstV = (int32_t *) _dstV;
2186 for (i = 0; i < width; i++) {
2187 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2188 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2191 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2194 int32_t *dst = (int32_t *) _dst;
2195 for (i = 0; i < width; i++)
2196 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2198 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2201 int32_t *dst = (int32_t *) _dst;
2202 for (i = 0; i < width; i++)
2203 dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
2206 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2207 const uint8_t *src, int srcW, int xInc)
2210 unsigned int xpos=0;
2211 for (i=0;i<dstWidth;i++) {
2212 register unsigned int xx=xpos>>16;
2213 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2214 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2217 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
2218 dst[i] = src[srcW-1]*128;
2221 // *** horizontal scale Y line to temp buffer
2222 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2223 const uint8_t *src, int srcW, int xInc,
2224 const int16_t *hLumFilter,
2225 const int16_t *hLumFilterPos, int hLumFilterSize,
2226 uint8_t *formatConvBuffer,
2227 uint32_t *pal, int isAlpha)
2229 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2230 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2233 toYV12(formatConvBuffer, src, srcW, pal);
2234 src= formatConvBuffer;
2238 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2239 c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
2240 } else if (!c->hyscale_fast) {
2241 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2242 } else { // fast bilinear upscale / crap downscale
2243 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2247 convertRange(dst, dstWidth);
2250 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2251 int dstWidth, const uint8_t *src1,
2252 const uint8_t *src2, int srcW, int xInc)
2255 unsigned int xpos=0;
2256 for (i=0;i<dstWidth;i++) {
2257 register unsigned int xx=xpos>>16;
2258 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2259 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2260 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2263 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
2264 dst1[i] = src1[srcW-1]*128;
2265 dst2[i] = src2[srcW-1]*128;
2269 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2270 const uint8_t *src1, const uint8_t *src2,
2271 int srcW, int xInc, const int16_t *hChrFilter,
2272 const int16_t *hChrFilterPos, int hChrFilterSize,
2273 uint8_t *formatConvBuffer, uint32_t *pal)
2276 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
2277 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2278 src1= formatConvBuffer;
2283 int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
2284 c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2285 c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
2286 } else if (!c->hcscale_fast) {
2287 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2288 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2289 } else { // fast bilinear upscale / crap downscale
2290 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2293 if (c->chrConvertRange)
2294 c->chrConvertRange(dst1, dst2, dstWidth);
2297 static av_always_inline void
2298 find_c_packed_planar_out_funcs(SwsContext *c,
2299 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2300 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2301 yuv2packedX_fn *yuv2packedX)
2303 enum PixelFormat dstFormat = c->dstFormat;
2305 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2306 *yuv2yuvX = yuv2nv12X_c;
2307 } else if (is16BPS(dstFormat)) {
2308 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2309 } else if (is9_OR_10BPS(dstFormat)) {
2310 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2311 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2313 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2316 *yuv2yuv1 = yuv2yuv1_c;
2317 *yuv2yuvX = yuv2yuvX_c;
2319 if(c->flags & SWS_FULL_CHR_H_INT) {
2320 switch (dstFormat) {
2323 *yuv2packedX = yuv2rgba32_full_X_c;
2325 #if CONFIG_SWSCALE_ALPHA
2327 *yuv2packedX = yuv2rgba32_full_X_c;
2329 #endif /* CONFIG_SWSCALE_ALPHA */
2331 *yuv2packedX = yuv2rgbx32_full_X_c;
2333 #endif /* !CONFIG_SMALL */
2337 *yuv2packedX = yuv2argb32_full_X_c;
2339 #if CONFIG_SWSCALE_ALPHA
2341 *yuv2packedX = yuv2argb32_full_X_c;
2343 #endif /* CONFIG_SWSCALE_ALPHA */
2345 *yuv2packedX = yuv2xrgb32_full_X_c;
2347 #endif /* !CONFIG_SMALL */
2351 *yuv2packedX = yuv2bgra32_full_X_c;
2353 #if CONFIG_SWSCALE_ALPHA
2355 *yuv2packedX = yuv2bgra32_full_X_c;
2357 #endif /* CONFIG_SWSCALE_ALPHA */
2359 *yuv2packedX = yuv2bgrx32_full_X_c;
2361 #endif /* !CONFIG_SMALL */
2365 *yuv2packedX = yuv2abgr32_full_X_c;
2367 #if CONFIG_SWSCALE_ALPHA
2369 *yuv2packedX = yuv2abgr32_full_X_c;
2371 #endif /* CONFIG_SWSCALE_ALPHA */
2373 *yuv2packedX = yuv2xbgr32_full_X_c;
2375 #endif /* !CONFIG_SMALL */
2378 *yuv2packedX = yuv2rgb24_full_X_c;
2381 *yuv2packedX = yuv2bgr24_full_X_c;
2388 switch (dstFormat) {
2389 case PIX_FMT_GRAY16BE:
2390 *yuv2packed1 = yuv2gray16BE_1_c;
2391 *yuv2packed2 = yuv2gray16BE_2_c;
2392 *yuv2packedX = yuv2gray16BE_X_c;
2394 case PIX_FMT_GRAY16LE:
2395 *yuv2packed1 = yuv2gray16LE_1_c;
2396 *yuv2packed2 = yuv2gray16LE_2_c;
2397 *yuv2packedX = yuv2gray16LE_X_c;
2399 case PIX_FMT_MONOWHITE:
2400 *yuv2packed1 = yuv2monowhite_1_c;
2401 *yuv2packed2 = yuv2monowhite_2_c;
2402 *yuv2packedX = yuv2monowhite_X_c;
2404 case PIX_FMT_MONOBLACK:
2405 *yuv2packed1 = yuv2monoblack_1_c;
2406 *yuv2packed2 = yuv2monoblack_2_c;
2407 *yuv2packedX = yuv2monoblack_X_c;
2409 case PIX_FMT_YUYV422:
2410 *yuv2packed1 = yuv2yuyv422_1_c;
2411 *yuv2packed2 = yuv2yuyv422_2_c;
2412 *yuv2packedX = yuv2yuyv422_X_c;
2414 case PIX_FMT_UYVY422:
2415 *yuv2packed1 = yuv2uyvy422_1_c;
2416 *yuv2packed2 = yuv2uyvy422_2_c;
2417 *yuv2packedX = yuv2uyvy422_X_c;
2419 case PIX_FMT_RGB48LE:
2420 *yuv2packed1 = yuv2rgb48le_1_c;
2421 *yuv2packed2 = yuv2rgb48le_2_c;
2422 *yuv2packedX = yuv2rgb48le_X_c;
2424 case PIX_FMT_RGB48BE:
2425 *yuv2packed1 = yuv2rgb48be_1_c;
2426 *yuv2packed2 = yuv2rgb48be_2_c;
2427 *yuv2packedX = yuv2rgb48be_X_c;
2429 case PIX_FMT_BGR48LE:
2430 *yuv2packed1 = yuv2bgr48le_1_c;
2431 *yuv2packed2 = yuv2bgr48le_2_c;
2432 *yuv2packedX = yuv2bgr48le_X_c;
2434 case PIX_FMT_BGR48BE:
2435 *yuv2packed1 = yuv2bgr48be_1_c;
2436 *yuv2packed2 = yuv2bgr48be_2_c;
2437 *yuv2packedX = yuv2bgr48be_X_c;
2442 *yuv2packed1 = yuv2rgb32_1_c;
2443 *yuv2packed2 = yuv2rgb32_2_c;
2444 *yuv2packedX = yuv2rgb32_X_c;
2446 #if CONFIG_SWSCALE_ALPHA
2448 *yuv2packed1 = yuv2rgba32_1_c;
2449 *yuv2packed2 = yuv2rgba32_2_c;
2450 *yuv2packedX = yuv2rgba32_X_c;
2452 #endif /* CONFIG_SWSCALE_ALPHA */
2454 *yuv2packed1 = yuv2rgbx32_1_c;
2455 *yuv2packed2 = yuv2rgbx32_2_c;
2456 *yuv2packedX = yuv2rgbx32_X_c;
2458 #endif /* !CONFIG_SMALL */
2460 case PIX_FMT_RGB32_1:
2461 case PIX_FMT_BGR32_1:
2463 *yuv2packed1 = yuv2rgb32_1_1_c;
2464 *yuv2packed2 = yuv2rgb32_1_2_c;
2465 *yuv2packedX = yuv2rgb32_1_X_c;
2467 #if CONFIG_SWSCALE_ALPHA
2469 *yuv2packed1 = yuv2rgba32_1_1_c;
2470 *yuv2packed2 = yuv2rgba32_1_2_c;
2471 *yuv2packedX = yuv2rgba32_1_X_c;
2473 #endif /* CONFIG_SWSCALE_ALPHA */
2475 *yuv2packed1 = yuv2rgbx32_1_1_c;
2476 *yuv2packed2 = yuv2rgbx32_1_2_c;
2477 *yuv2packedX = yuv2rgbx32_1_X_c;
2479 #endif /* !CONFIG_SMALL */
2482 *yuv2packed1 = yuv2rgb24_1_c;
2483 *yuv2packed2 = yuv2rgb24_2_c;
2484 *yuv2packedX = yuv2rgb24_X_c;
2487 *yuv2packed1 = yuv2bgr24_1_c;
2488 *yuv2packed2 = yuv2bgr24_2_c;
2489 *yuv2packedX = yuv2bgr24_X_c;
2491 case PIX_FMT_RGB565LE:
2492 case PIX_FMT_RGB565BE:
2493 case PIX_FMT_BGR565LE:
2494 case PIX_FMT_BGR565BE:
2495 *yuv2packed1 = yuv2rgb16_1_c;
2496 *yuv2packed2 = yuv2rgb16_2_c;
2497 *yuv2packedX = yuv2rgb16_X_c;
2499 case PIX_FMT_RGB555LE:
2500 case PIX_FMT_RGB555BE:
2501 case PIX_FMT_BGR555LE:
2502 case PIX_FMT_BGR555BE:
2503 *yuv2packed1 = yuv2rgb15_1_c;
2504 *yuv2packed2 = yuv2rgb15_2_c;
2505 *yuv2packedX = yuv2rgb15_X_c;
2507 case PIX_FMT_RGB444LE:
2508 case PIX_FMT_RGB444BE:
2509 case PIX_FMT_BGR444LE:
2510 case PIX_FMT_BGR444BE:
2511 *yuv2packed1 = yuv2rgb12_1_c;
2512 *yuv2packed2 = yuv2rgb12_2_c;
2513 *yuv2packedX = yuv2rgb12_X_c;
2517 *yuv2packed1 = yuv2rgb8_1_c;
2518 *yuv2packed2 = yuv2rgb8_2_c;
2519 *yuv2packedX = yuv2rgb8_X_c;
2523 *yuv2packed1 = yuv2rgb4_1_c;
2524 *yuv2packed2 = yuv2rgb4_2_c;
2525 *yuv2packedX = yuv2rgb4_X_c;
2527 case PIX_FMT_RGB4_BYTE:
2528 case PIX_FMT_BGR4_BYTE:
2529 *yuv2packed1 = yuv2rgb4b_1_c;
2530 *yuv2packed2 = yuv2rgb4b_2_c;
2531 *yuv2packedX = yuv2rgb4b_X_c;
2537 #define DEBUG_SWSCALE_BUFFERS 0
2538 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2540 static int swScale(SwsContext *c, const uint8_t* src[],
2541 int srcStride[], int srcSliceY,
2542 int srcSliceH, uint8_t* dst[], int dstStride[])
2544 /* load a few things into local vars to make the code more readable? and faster */
2545 const int srcW= c->srcW;
2546 const int dstW= c->dstW;
2547 const int dstH= c->dstH;
2548 const int chrDstW= c->chrDstW;
2549 const int chrSrcW= c->chrSrcW;
2550 const int lumXInc= c->lumXInc;
2551 const int chrXInc= c->chrXInc;
2552 const enum PixelFormat dstFormat= c->dstFormat;
2553 const int flags= c->flags;
2554 int16_t *vLumFilterPos= c->vLumFilterPos;
2555 int16_t *vChrFilterPos= c->vChrFilterPos;
2556 int16_t *hLumFilterPos= c->hLumFilterPos;
2557 int16_t *hChrFilterPos= c->hChrFilterPos;
2558 int16_t *vLumFilter= c->vLumFilter;
2559 int16_t *vChrFilter= c->vChrFilter;
2560 int16_t *hLumFilter= c->hLumFilter;
2561 int16_t *hChrFilter= c->hChrFilter;
2562 int32_t *lumMmxFilter= c->lumMmxFilter;
2563 int32_t *chrMmxFilter= c->chrMmxFilter;
2564 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2565 const int vLumFilterSize= c->vLumFilterSize;
2566 const int vChrFilterSize= c->vChrFilterSize;
2567 const int hLumFilterSize= c->hLumFilterSize;
2568 const int hChrFilterSize= c->hChrFilterSize;
2569 int16_t **lumPixBuf= c->lumPixBuf;
2570 int16_t **chrUPixBuf= c->chrUPixBuf;
2571 int16_t **chrVPixBuf= c->chrVPixBuf;
2572 int16_t **alpPixBuf= c->alpPixBuf;
2573 const int vLumBufSize= c->vLumBufSize;
2574 const int vChrBufSize= c->vChrBufSize;
2575 uint8_t *formatConvBuffer= c->formatConvBuffer;
2576 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2577 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2579 uint32_t *pal=c->pal_yuv;
2581 int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
2582 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2583 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2584 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2585 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2586 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2588 /* vars which will change and which we need to store back in the context */
2590 int lumBufIndex= c->lumBufIndex;
2591 int chrBufIndex= c->chrBufIndex;
2592 int lastInLumBuf= c->lastInLumBuf;
2593 int lastInChrBuf= c->lastInChrBuf;
2595 if (isPacked(c->srcFormat)) {
2603 srcStride[3]= srcStride[0];
2605 srcStride[1]<<= c->vChrDrop;
2606 srcStride[2]<<= c->vChrDrop;
2608 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2609 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2610 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2611 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2612 srcSliceY, srcSliceH, dstY, dstH);
2613 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2614 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2616 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2617 static int warnedAlready=0; //FIXME move this into the context perhaps
2618 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2619 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2620 " ->cannot do aligned memory accesses anymore\n");
2625 /* Note the user might start scaling the picture in the middle so this
2626 will not get executed. This is not really intended but works
2627 currently, so people might do it. */
2628 if (srcSliceY ==0) {
2636 if (!should_dither) {
2637 c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2641 for (;dstY < dstH; dstY++) {
2642 const int chrDstY= dstY>>c->chrDstVSubSample;
2643 uint8_t *dest[4] = {
2644 dst[0] + dstStride[0] * dstY,
2645 dst[1] + dstStride[1] * chrDstY,
2646 dst[2] + dstStride[2] * chrDstY,
2647 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2650 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2651 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2652 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2653 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2654 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2655 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2658 //handle holes (FAST_BILINEAR & weird filters)
2659 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2660 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2661 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2662 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2664 DEBUG_BUFFERS("dstY: %d\n", dstY);
2665 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2666 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2667 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2668 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2670 // Do we have enough lines in this slice to output the dstY line
2671 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2673 if (!enough_lines) {
2674 lastLumSrcY = srcSliceY + srcSliceH - 1;
2675 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2676 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2677 lastLumSrcY, lastChrSrcY);
2680 //Do horizontal scaling
2681 while(lastInLumBuf < lastLumSrcY) {
2682 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2683 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2685 assert(lumBufIndex < 2*vLumBufSize);
2686 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2687 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2688 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2689 hLumFilter, hLumFilterPos, hLumFilterSize,
2692 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2693 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2694 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2698 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2699 lumBufIndex, lastInLumBuf);
2701 while(lastInChrBuf < lastChrSrcY) {
2702 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2703 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2705 assert(chrBufIndex < 2*vChrBufSize);
2706 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2707 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2708 //FIXME replace parameters through context struct (some at least)
2710 if (c->needs_hcscale)
2711 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2712 chrDstW, src1, src2, chrSrcW, chrXInc,
2713 hChrFilter, hChrFilterPos, hChrFilterSize,
2714 formatConvBuffer, pal);
2716 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2717 chrBufIndex, lastInChrBuf);
2719 //wrap buf index around to stay inside the ring buffer
2720 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2721 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2723 break; //we can't output a dstY line so let's try with the next slice
2726 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2728 if (should_dither) {
2729 c->chrDither8 = dither_8x8_128[chrDstY & 7];
2730 c->lumDither8 = dither_8x8_128[dstY & 7];
2732 if (dstY >= dstH-2) {
2733 // hmm looks like we can't use MMX here without overwriting this array's tail
2734 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2735 &yuv2packed1, &yuv2packed2,
2740 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2741 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2742 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2743 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2745 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2746 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2747 if ((dstY&chrSkipMask) || isGray(dstFormat))
2748 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2749 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2750 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2751 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2752 dest, dstW, chrDstW);
2753 } else { //General YV12
2754 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2755 lumSrcPtr, vLumFilterSize,
2756 vChrFilter + chrDstY * vChrFilterSize,
2757 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2758 alpSrcPtr, dest, dstW, chrDstW);
2761 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2762 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2763 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2764 int chrAlpha = vChrFilter[2 * dstY + 1];
2765 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2766 alpPixBuf ? *alpSrcPtr : NULL,
2767 dest[0], dstW, chrAlpha, dstY);
2768 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2769 int lumAlpha = vLumFilter[2 * dstY + 1];
2770 int chrAlpha = vChrFilter[2 * dstY + 1];
2772 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2774 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2775 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2776 alpPixBuf ? alpSrcPtr : NULL,
2777 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2778 } else { //general RGB
2779 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2780 lumSrcPtr, vLumFilterSize,
2781 vChrFilter + dstY * vChrFilterSize,
2782 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2783 alpSrcPtr, dest[0], dstW, dstY);
2789 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2790 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2793 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2794 __asm__ volatile("sfence":::"memory");
2798 /* store changed local vars back in the context */
2800 c->lumBufIndex= lumBufIndex;
2801 c->chrBufIndex= chrBufIndex;
2802 c->lastInLumBuf= lastInLumBuf;
2803 c->lastInChrBuf= lastInChrBuf;
2805 return dstY - lastDstY;
2808 static av_cold void sws_init_swScale_c(SwsContext *c)
2810 enum PixelFormat srcFormat = c->srcFormat;
2812 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2813 &c->yuv2packed1, &c->yuv2packed2,
2816 c->chrToYV12 = NULL;
2818 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2819 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2820 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2821 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2825 case PIX_FMT_BGR4_BYTE:
2826 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2827 case PIX_FMT_YUV444P9BE:
2828 case PIX_FMT_YUV420P9BE:
2829 case PIX_FMT_YUV444P10BE:
2830 case PIX_FMT_YUV422P10BE:
2831 case PIX_FMT_YUV420P10BE: c->hScale16= HAVE_BIGENDIAN ? NULL : hScale16NX_c; break;
2832 case PIX_FMT_YUV444P9LE:
2833 case PIX_FMT_YUV420P9LE:
2834 case PIX_FMT_YUV422P10LE:
2835 case PIX_FMT_YUV420P10LE:
2836 case PIX_FMT_YUV444P10LE: c->hScale16= HAVE_BIGENDIAN ? hScale16NX_c : NULL; break;
2838 case PIX_FMT_YUV420P16LE:
2839 case PIX_FMT_YUV422P16LE:
2840 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2842 case PIX_FMT_YUV420P16BE:
2843 case PIX_FMT_YUV422P16BE:
2844 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2847 if (c->chrSrcHSubSample) {
2849 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2850 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2851 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2852 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2853 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2854 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2855 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2856 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2857 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2858 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2859 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2860 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2861 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2862 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2863 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2864 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2865 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2866 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2870 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2871 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2872 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2873 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2874 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2875 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2876 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2877 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2878 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2879 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2880 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2881 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2882 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2883 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2884 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2885 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2886 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2887 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2891 c->lumToYV12 = NULL;
2892 c->alpToYV12 = NULL;
2893 switch (srcFormat) {
2895 case PIX_FMT_YUV420P16LE:
2896 case PIX_FMT_YUV422P16LE:
2897 case PIX_FMT_YUV444P16LE:
2898 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2900 case PIX_FMT_YUV420P16BE:
2901 case PIX_FMT_YUV422P16BE:
2902 case PIX_FMT_YUV444P16BE:
2903 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2905 case PIX_FMT_YUYV422 :
2906 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2907 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2908 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2909 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2910 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2911 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2912 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2913 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2914 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2915 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2916 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2917 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2921 case PIX_FMT_BGR4_BYTE:
2922 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2923 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2924 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2925 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2926 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2927 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2928 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2929 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2930 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2931 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2932 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2935 switch (srcFormat) {
2937 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2939 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2940 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2941 case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
2946 if (c->srcBpc == 8) {
2947 if (c->dstBpc <= 10) {
2948 if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2949 || c->srcFormat == PIX_FMT_PAL8)
2950 c->hScale16= hScale16N_c;
2951 c->hScale = hScale8To15_c;
2952 if (c->flags & SWS_FAST_BILINEAR) {
2953 c->hyscale_fast = hyscale_fast_c;
2954 c->hcscale_fast = hcscale_fast_c;
2957 c->hScale = hScale8To19_c;
2958 av_assert0(c->hScale16 != hScale16N_c && c->hScale16 != hScale16NX_c);
2962 if((isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
2963 || c->srcFormat == PIX_FMT_PAL8)
2964 c->hScale16= hScale16N_c;
2965 if(c->hScale16 == hScale16NX_c && !isAnyRGB(c->srcFormat)){
2966 c->chrToYV12 = bswap16UV_c;
2967 c->lumToYV12 = bswap16Y_c;
2971 c->hScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2974 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2975 if (c->dstBpc <= 10) {
2977 c->lumConvertRange = lumRangeFromJpeg_c;
2978 c->chrConvertRange = chrRangeFromJpeg_c;
2980 c->lumConvertRange = lumRangeToJpeg_c;
2981 c->chrConvertRange = chrRangeToJpeg_c;
2985 c->lumConvertRange = lumRangeFromJpeg16_c;
2986 c->chrConvertRange = chrRangeFromJpeg16_c;
2988 c->lumConvertRange = lumRangeToJpeg16_c;
2989 c->chrConvertRange = chrRangeToJpeg16_c;
2994 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2995 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2996 c->needs_hcscale = 1;
2999 SwsFunc ff_getSwsFunc(SwsContext *c)
3001 sws_init_swScale_c(c);
3004 ff_sws_init_swScale_mmx(c);
3006 ff_sws_init_swScale_altivec(c);