@section Submitting patches
-First, read the (@pxref{Coding Rules}) above if you did not yet.
+First, read the @ref{Coding Rules} above if you did not yet, in particular
+the rules regarding patch submission.
When you submit your patch, please use @code{git format-patch} or
@code{git send-email}. We cannot read other diffs :-)
Use the patcheck tool of FFmpeg to check your patch.
The tool is located in the tools directory.
-Run the regression tests before submitting a patch so that you can
-verify that there are no big problems.
+Run the @ref{Regression Tests} before submitting a patch in order to verify
+it does not cause unexpected problems.
Patches should be posted as base64 encoded attachments (or any other
encoding which ensures that the patch will not be trashed during
This filter corresponds to the libopencv function @code{cvErode}.
The filter accepts the parameters: @var{struct_el}:@var{nb_iterations},
-with the same meaning and use of those of the dilate filter
-(@pxref{dilate}).
+with the same syntax and semantics as the @ref{dilate} filter.
@subsection smooth
setdar=1.77777
@end example
-See also the "setsar" filter documentation (@pxref{setsar}).
+See also the @ref{setsar} filter documentation.
@section setpts
the form @var{num}/@var{den} or a frame rate abbreviation.
@var{src_name} is the name to the frei0r source to load. For more
information regarding frei0r and how to set the parameters read the
-section "frei0r" (@pxref{frei0r}) in the description of the video
-filters.
+section @ref{frei0r} in the description of the video filters.
Some examples follow:
@example
ffmpeg -i INPUT -acodec pcm_u8 -vcodec mpeg2video -f crc -
@end example
-See also the @code{framecrc} muxer (@pxref{framecrc}).
+See also the @ref{framecrc} muxer.
@anchor{framecrc}
@section framecrc
ffmpeg -i INPUT -acodec pcm_u8 -vcodec mpeg2video -f framecrc -
@end example
-See also the @code{crc} muxer (@pxref{crc}).
+See also the @ref{crc} muxer.
@section image2
s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g;
s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;
s/;\s+\@pxref\{(?:[^\}]*)\}//g;
+ s/\@ref\{([^\}]*)\}/$1/g;
s/\@noindent\s*//g;
s/\@refill//g;
s/\@gol//g;
prefetch_motion(h, 1, pixel_shift, chroma444);
}
-#define hl_motion_fn(sh, bits) \
-static av_always_inline void hl_motion_ ## bits(H264Context *h, \
- uint8_t *dest_y, \
- uint8_t *dest_cb, uint8_t *dest_cr, \
- qpel_mc_func (*qpix_put)[16], \
- h264_chroma_mc_func (*chroma_put), \
- qpel_mc_func (*qpix_avg)[16], \
- h264_chroma_mc_func (*chroma_avg), \
- h264_weight_func *weight_op, \
- h264_biweight_func *weight_avg, \
- int chroma444) \
-{ \
- hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
- qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
-}
-hl_motion_fn(0, 8);
-hl_motion_fn(1, 16);
-
static void free_tables(H264Context *h, int free_rbsp){
int i;
H264Context *hx;
ff_thread_finish_setup(s->avctx);
}
-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
+static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
MpegEncContext * const s = &h->s;
uint8_t *top_border;
int top_idx = 1;
}
}
-static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
+static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize,
int xchg, int chroma444,
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
}else if(is_h264){
- if (pixel_shift) {
- hl_motion_16(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab, 0);
- } else
- hl_motion_8(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab, 0);
+ hl_motion(h, dest_y, dest_cb, dest_cr,
+ s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+ s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+ h->h264dsp.weight_h264_pixels_tab,
+ h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0);
}
hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
if(h->deblocking_filter)
xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
}else{
- if (pixel_shift) {
- hl_motion_16(h, dest[0], dest[1], dest[2],
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab, 1);
- } else
- hl_motion_8(h, dest[0], dest[1], dest[2],
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab, 1);
+ hl_motion(h, dest[0], dest[1], dest[2],
+ s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+ s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+ h->h264dsp.weight_h264_pixels_tab,
+ h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1);
}
for (p = 0; p < plane_count; p++)
}
}
}
- h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
+ h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
+ - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
+ + 6 * (h->sps.bit_depth_luma - 8);
#if 0 //FMO
if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
/**
* gets the chroma qp.
*/
-static inline int get_chroma_qp(H264Context *h, int t, int qscale){
+static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
return h->pps.chroma_qp_table[t][qscale];
}
-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
+static av_always_inline void pred_pskip_motion(H264Context * const h);
static void fill_decode_neighbors(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
}
else
{
- int mx, my;
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
fill_decode_neighbors(h, mb_type);
- fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
- pred_pskip_motion(h, &mx, &my);
- fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
- fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+ pred_pskip_motion(h);
}
write_back_motion(h, mb_type);
}
}
} else {
- uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
- nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+ fill_rectangle(&h->non_zero_count_cache[scan8[4*i8x8+16*p]], 2, 2, 8, 0, 1);
}
}
}
MpegEncContext * const s = &h->s;
int mb_xy;
int mb_type, left_type, top_type;
- int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+ int qp, qp0, qp1, qpc, qpc0, qpc1;
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
int chroma444 = CHROMA444;
qp1 = (qp + qp1 + 1) >> 1;
qpc0 = (qpc + qpc0 + 1) >> 1;
qpc1 = (qpc + qpc1 + 1) >> 1;
- qp_thresh = 15+52 - h->slice_alpha_c0_offset;
- if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
- qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
- return;
if( IS_INTRA(mb_type) ) {
static const int16_t bS4[4] = {4,4,4,4};
} else {
LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
int edges;
- if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
+ if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 && !chroma444 ) {
edges = 4;
AV_WN64A(bS[0][0], 0x0002000200020002ULL);
AV_WN64A(bS[0][2], 0x0002000200020002ULL);
//#undef NDEBUG
#include <assert.h>
-static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
+static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
MpegEncContext *s = &h->s;
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
const int index8= scan8[n];
const int top_ref= h->ref_cache[list][ index8 - 8 ];
const int left_ref= h->ref_cache[list][ index8 - 1 ];
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
* @param mx the x component of the predicted motion vector
* @param my the y component of the predicted motion vector
*/
-static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
if(n==0){
const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
pred_motion(h, n, 2, list, ref, mx, my);
}
-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
- const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
- const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
+#define FIX_MV_MBAFF(type, refn, mvn, idx)\
+ if(FRAME_MBAFF){\
+ if(MB_FIELD){\
+ if(!IS_INTERLACED(type)){\
+ refn <<= 1;\
+ AV_COPY32(mvbuf[idx], mvn);\
+ mvbuf[idx][1] /= 2;\
+ mvn = mvbuf[idx];\
+ }\
+ }else{\
+ if(IS_INTERLACED(type)){\
+ refn >>= 1;\
+ AV_COPY32(mvbuf[idx], mvn);\
+ mvbuf[idx][1] <<= 1;\
+ mvn = mvbuf[idx];\
+ }\
+ }\
+ }
- tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+static av_always_inline void pred_pskip_motion(H264Context * const h){
+ DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
+ DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
+ MpegEncContext * const s = &h->s;
+ int8_t *ref = s->current_picture.ref_index[0];
+ int16_t (*mv)[2] = s->current_picture.motion_val[0];
+ int top_ref, left_ref, diagonal_ref, match_count, mx, my;
+ const int16_t *A, *B, *C;
+ int b_stride = h->b_stride;
+
+ fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+
+ /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
+ * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
+ * faster this way. Is there a way to avoid this duplication?
+ */
+ if(USES_LIST(h->left_type[LTOP], 0)){
+ left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
+ A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
+ FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
+ if(!(left_ref | AV_RN32A(A))){
+ goto zeromv;
+ }
+ }else if(h->left_type[LTOP]){
+ left_ref = LIST_NOT_USED;
+ A = zeromv;
+ }else{
+ goto zeromv;
+ }
- if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
- || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
- || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
+ if(USES_LIST(h->top_type, 0)){
+ top_ref = ref[4*h->top_mb_xy + 2];
+ B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
+ FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
+ if(!(top_ref | AV_RN32A(B))){
+ goto zeromv;
+ }
+ }else if(h->top_type){
+ top_ref = LIST_NOT_USED;
+ B = zeromv;
+ }else{
+ goto zeromv;
+ }
+
+ tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
- *mx = *my = 0;
- return;
+ if(USES_LIST(h->topright_type, 0)){
+ diagonal_ref = ref[4*h->topright_mb_xy + 2];
+ C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
+ FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
+ }else if(h->topright_type){
+ diagonal_ref = LIST_NOT_USED;
+ C = zeromv;
+ }else{
+ if(USES_LIST(h->topleft_type, 0)){
+ diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
+ C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
+ FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
+ }else if(h->topleft_type){
+ diagonal_ref = LIST_NOT_USED;
+ C = zeromv;
+ }else{
+ diagonal_ref = PART_NOT_AVAILABLE;
+ C = zeromv;
+ }
}
- pred_motion(h, 0, 4, 0, 0, mx, my);
+ match_count= !diagonal_ref + !top_ref + !left_ref;
+ tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
+ if(match_count > 1){
+ mx = mid_pred(A[0], B[0], C[0]);
+ my = mid_pred(A[1], B[1], C[1]);
+ }else if(match_count==1){
+ if(!left_ref){
+ mx = A[0];
+ my = A[1];
+ }else if(!top_ref){
+ mx = B[0];
+ my = B[1];
+ }else{
+ mx = C[0];
+ my = C[1];
+ }
+ }else{
+ mx = mid_pred(A[0], B[0], C[0]);
+ my = mid_pred(A[1], B[1], C[1]);
+ }
+ fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+ return;
+zeromv:
+ fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
return;
}
static int xvid_ff_2pass_after(struct xvid_context *ref,
xvid_plg_data_t *param) {
char *log = ref->twopassbuffer;
- char *frame_types = " ipbs";
+ const char *frame_types = " ipbs";
char frame_type;
/* Quick bounds check */
*/
av_cold int MPV_common_init(MpegEncContext *s)
{
- int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
+ int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y,
+ threads = (s->encoding ||
+ (HAVE_THREADS &&
+ s->avctx->active_thread_type & FF_THREAD_SLICE)) ?
+ s->avctx->thread_count : 1;
if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
s->mb_height = (s->height + 31) / 32 * 2;
if((s->encoding || (s->avctx->active_thread_type & FF_THREAD_SLICE)) &&
(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){
- av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
- return -1;
+ int max_threads = FFMIN(MAX_THREADS, s->mb_height);
+ av_log(s->avctx, AV_LOG_WARNING, "too many threads (%d), reducing to %d\n",
+ s->avctx->thread_count, max_threads);
+ threads = max_threads;
}
if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
s->thread_context[0]= s;
if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) {
- threads = s->avctx->thread_count;
-
for(i=1; i<threads; i++){
s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
{0x8000000080000000ULL, 0x8000000080000000ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_1 ) = 0x0001000100010001ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1 ) = {0x0001000100010001ULL, 0x0001000100010001ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2 ) = {0x0002000200020002ULL, 0x0002000200020002ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL};
SECTION .text
+cextern pw_8
cextern pw_4
+cextern pw_2
cextern pw_1
+; dest, left, right, src
+; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
%macro PRED4x4_LOWPASS 4
paddw %2, %3
psrlw %2, 1
dec r2
jg .loop
REP_RET
+
+;-----------------------------------------------------------------------------
+; void predict_8x8_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MOV8 2-3
+; sort of a hack, but it works
+%if mmsize==8
+ movq [%1+0], %2
+ movq [%1+8], %3
+%else
+ movdqa [%1], %2
+%endif
+%endmacro
+
+%macro PRED8x8_DC 2
+cglobal pred8x8_dc_10_%1, 2,4
+%ifdef ARCH_X86_64
+%define t0 r10
+%else
+%define t0 r0m
+%endif
+ sub r0, r1
+ pxor m4, m4
+ movq m0, [r0+0]
+ movq m1, [r0+8]
+ HADDW m0, m2
+ mov t0, r0
+ HADDW m1, m2
+
+ movzx r2d, word [r0+r1*1-2]
+ movzx r3d, word [r0+r1*2-2]
+ lea r0, [r0+r1*2]
+ add r2d, r3d
+ movzx r3d, word [r0+r1*1-2]
+ add r2d, r3d
+ movzx r3d, word [r0+r1*2-2]
+ add r2d, r3d
+ lea r0, [r0+r1*2]
+ movd m2, r2d ; s2
+
+ movzx r2d, word [r0+r1*1-2]
+ movzx r3d, word [r0+r1*2-2]
+ lea r0, [r0+r1*2]
+ add r2d, r3d
+ movzx r3d, word [r0+r1*1-2]
+ add r2d, r3d
+ movzx r3d, word [r0+r1*2-2]
+ add r2d, r3d
+ movd m3, r2d ; s3
+
+ punpcklwd m0, m1
+ mov r0, t0
+ punpcklwd m2, m3
+ punpckldq m0, m2 ; s0, s1, s2, s3
+ %2 m3, m0, 11110110b ; s2, s1, s3, s3
+ lea r2, [r1+r1*2]
+ %2 m0, m0, 01110100b ; s0, s1, s3, s1
+ paddw m0, m3
+ lea r3, [r0+r1*4]
+ psrlw m0, 2
+ pavgw m0, m4 ; s0+s2, s1, s3, s1+s3
+%ifidn %1, sse2
+ punpcklwd m0, m0
+ pshufd m3, m0, 11111010b
+ punpckldq m0, m0
+ SWAP 0,1
+%else
+ pshufw m1, m0, 0x00
+ pshufw m2, m0, 0x55
+ pshufw m3, m0, 0xaa
+ pshufw m4, m0, 0xff
+%endif
+ MOV8 r0+r1*1, m1, m2
+ MOV8 r0+r1*2, m1, m2
+ MOV8 r0+r2*1, m1, m2
+ MOV8 r0+r1*4, m1, m2
+ MOV8 r3+r1*1, m3, m4
+ MOV8 r3+r1*2, m3, m4
+ MOV8 r3+r2*1, m3, m4
+ MOV8 r3+r1*4, m3, m4
+ RET
+%endmacro
+
+INIT_MMX
+PRED8x8_DC mmxext, pshufw
+INIT_XMM
+PRED8x8_DC sse2 , pshuflw
+
+;-----------------------------------------------------------------------------
+; void pred8x8_top_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8_TOP_DC 2
+cglobal pred8x8_top_dc_10_%1, 2,4
+ sub r0, r1
+ movq m0, [r0+0]
+ movq m1, [r0+8]
+ HADDW m0, m2
+ HADDW m1, m3
+ lea r2, [r1+r1*2]
+ paddw m0, [pw_2]
+ paddw m1, [pw_2]
+ lea r3, [r0+r1*4]
+ psrlw m0, 2
+ psrlw m1, 2
+ %2 m0, m0, 0
+ %2 m1, m1, 0
+%ifidn %1, sse2
+ punpcklqdq m0, m1
+%endif
+ MOV8 r0+r1*1, m0, m1
+ MOV8 r0+r1*2, m0, m1
+ MOV8 r0+r2*1, m0, m1
+ MOV8 r0+r1*4, m0, m1
+ MOV8 r3+r1*1, m0, m1
+ MOV8 r3+r1*2, m0, m1
+ MOV8 r3+r2*1, m0, m1
+ MOV8 r3+r1*4, m0, m1
+ RET
+%endmacro
+
+INIT_MMX
+PRED8x8_TOP_DC mmxext, pshufw
+INIT_XMM
+PRED8x8_TOP_DC sse2 , pshuflw
+
+
+
+;-----------------------------------------------------------------------------
+; void pred8x8l_top_dc(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_TOP_DC 1
+cglobal pred8x8l_top_dc_10_%1, 4,4,6
+ sub r0, r3
+ pxor m7, m7
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1 ; top_left
+ jz .fix_lt_2
+ test r2, r2 ; top_right
+ jz .fix_tr_1
+ jmp .body
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2 ; top_right
+ jnz .body
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+.body
+ lea r1, [r3+r3*2]
+ lea r2, [r0+r3*4]
+ PRED4x4_LOWPASS m0, m2, m1, m3
+ HADDW m0, m1
+ paddw m0, [pw_4]
+ psrlw m0, 3
+ SPLATW m0, m0, 0
+ mova [r0+r3*1], m0
+ mova [r0+r3*2], m0
+ mova [r0+r1*1], m0
+ mova [r0+r3*4], m0
+ mova [r2+r3*1], m0
+ mova [r2+r3*2], m0
+ mova [r2+r1*1], m0
+ mova [r2+r3*4], m0
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_TOP_DC sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_TOP_DC ssse3
+
+;-----------------------------------------------------------------------------
+;void pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+;TODO: see if scalar is faster
+%macro PRED8x8L_DC 1
+cglobal pred8x8l_dc_10_%1, 4,5,8
+ sub r0, r3
+ lea r4, [r0+r3*2]
+ mova m0, [r0+r3*1-16]
+ punpckhwd m0, [r0+r3*0-16]
+ mova m1, [r4+r3*1-16]
+ punpckhwd m1, [r0+r3*2-16]
+ mov r4, r0
+ punpckhdq m1, m0
+ lea r0, [r0+r3*4]
+ mova m2, [r0+r3*1-16]
+ punpckhwd m2, [r0+r3*0-16]
+ lea r0, [r0+r3*2]
+ mova m3, [r0+r3*1-16]
+ punpckhwd m3, [r0+r3*0-16]
+ punpckhdq m3, m2
+ punpckhqdq m3, m1
+ lea r0, [r0+r3*2]
+ mova m0, [r0+r3*0-16]
+ mova m1, [r4]
+ mov r0, r4
+ mova m4, m3
+ mova m2, m3
+ PALIGNR m4, m0, 14, m0
+ PALIGNR m1, m2, 2, m2
+ test r1, r1
+ jnz .do_left
+.fix_lt_1:
+ mova m5, m3
+ pxor m5, m4
+ psrldq m5, 14
+ pslldq m5, 12
+ pxor m1, m5
+ jmp .do_left
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2
+ jnz .body
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+ jmp .body
+.do_left:
+ mova m0, m4
+ PRED4x4_LOWPASS m2, m1, m4, m3
+ mova m4, m0
+ mova m7, m2
+ PRED4x4_LOWPASS m1, m3, m0, m4
+ pslldq m1, 14
+ PALIGNR m7, m1, 14, m3
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1
+ jz .fix_lt_2
+ test r2, r2
+ jz .fix_tr_1
+.body
+ lea r1, [r3+r3*2]
+ PRED4x4_LOWPASS m6, m2, m1, m3
+ HADDW m7, m0
+ HADDW m6, m0
+ lea r2, [r0+r3*4]
+ paddw m7, [pw_8]
+ paddw m7, m6
+ psrlw m7, 4
+ SPLATW m7, m7
+ mova [r0+r3*1], m7
+ mova [r0+r3*2], m7
+ mova [r0+r1*1], m7
+ mova [r0+r3*4], m7
+ mova [r2+r3*1], m7
+ mova [r2+r3*2], m7
+ mova [r2+r1*1], m7
+ mova [r2+r3*4], m7
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_DC sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_DC ssse3
+
+;-----------------------------------------------------------------------------
+; void pred8x8l_vertical(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL 1
+cglobal pred8x8l_vertical_10_%1, 4,4,6
+ sub r0, r3
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1 ; top_left
+ jz .fix_lt_2
+ test r2, r2 ; top_right
+ jz .fix_tr_1
+ jmp .body
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2 ; top_right
+ jnz .body
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+.body
+ lea r1, [r3+r3*2]
+ lea r2, [r0+r3*4]
+ PRED4x4_LOWPASS m0, m2, m1, m3
+ mova [r0+r3*1], m0
+ mova [r0+r3*2], m0
+ mova [r0+r1*1], m0
+ mova [r0+r3*4], m0
+ mova [r2+r3*1], m0
+ mova [r2+r3*2], m0
+ mova [r2+r1*1], m0
+ mova [r2+r3*4], m0
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_VERTICAL sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_VERTICAL ssse3
+
+;-----------------------------------------------------------------------------
+; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL 1
+cglobal pred8x8l_horizontal_10_%1, 4,4,8
+ sub r0, r3
+ lea r2, [r0+r3*2]
+ mova m0, [r0+r3*1-16]
+ test r1, r1
+ lea r1, [r0+r3]
+ cmovnz r1, r0
+ punpckhwd m0, [r1+r3*0-16]
+ mova m1, [r2+r3*1-16]
+ punpckhwd m1, [r0+r3*2-16]
+ mov r2, r0
+ punpckhdq m1, m0
+ lea r0, [r0+r3*4]
+ mova m2, [r0+r3*1-16]
+ punpckhwd m2, [r0+r3*0-16]
+ lea r0, [r0+r3*2]
+ mova m3, [r0+r3*1-16]
+ punpckhwd m3, [r0+r3*0-16]
+ punpckhdq m3, m2
+ punpckhqdq m3, m1
+ lea r0, [r0+r3*2]
+ mova m0, [r0+r3*0-16]
+ mova m1, [r1+r3*0-16]
+ mov r0, r2
+ mova m4, m3
+ mova m2, m3
+ PALIGNR m4, m0, 14, m0
+ PALIGNR m1, m2, 2, m2
+ mova m0, m4
+ PRED4x4_LOWPASS m2, m1, m4, m3
+ mova m4, m0
+ mova m7, m2
+ PRED4x4_LOWPASS m1, m3, m0, m4
+ pslldq m1, 14
+ PALIGNR m7, m1, 14, m3
+ lea r1, [r3+r3*2]
+ punpckhwd m3, m7, m7
+ punpcklwd m7, m7
+ pshufd m0, m3, 0xff
+ pshufd m1, m3, 0xaa
+ lea r2, [r0+r3*4]
+ pshufd m2, m3, 0x55
+ pshufd m3, m3, 0x00
+ pshufd m4, m7, 0xff
+ pshufd m5, m7, 0xaa
+ pshufd m6, m7, 0x55
+ pshufd m7, m7, 0x00
+ mova [r0+r3*1], m0
+ mova [r0+r3*2], m1
+ mova [r0+r1*1], m2
+ mova [r0+r3*4], m3
+ mova [r2+r3*1], m4
+ mova [r2+r3*2], m5
+ mova [r2+r1*1], m6
+ mova [r2+r3*4], m7
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_HORIZONTAL sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_HORIZONTAL ssse3
+
+;-----------------------------------------------------------------------------
+;void pred8x8l_down_left(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_LEFT 1
+cglobal pred8x8l_down_left_10_%1, 4,4,8
+ sub r0, r3
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1
+ jz .fix_lt_2
+ test r2, r2
+ jz .fix_tr_1
+ jmp .do_top
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2
+ jnz .do_top
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+ jmp .do_top
+.fix_tr_2:
+ punpckhwd m3, m3
+ pshufd m1, m3, 0xFF
+ jmp .do_topright
+.do_top:
+ PRED4x4_LOWPASS m4, m2, m1, m3
+ mova m7, m4
+ test r2, r2
+ jz .fix_tr_2
+ mova m0, [r0+16]
+ mova m5, m0
+ mova m2, m0
+ mova m4, m0
+ psrldq m5, 14
+ PALIGNR m2, m3, 14, m3
+ PALIGNR m5, m4, 2, m4
+ PRED4x4_LOWPASS m1, m2, m5, m0
+.do_topright:
+ lea r1, [r3+r3*2]
+ mova m6, m1
+ psrldq m1, 14
+ mova m4, m1
+ lea r2, [r0+r3*4]
+ mova m2, m6
+ PALIGNR m2, m7, 2, m0
+ mova m3, m6
+ PALIGNR m3, m7, 14, m0
+ PALIGNR m4, m6, 2, m0
+ mova m5, m7
+ mova m1, m7
+ mova m7, m6
+ pslldq m1, 2
+ PRED4x4_LOWPASS m0, m1, m2, m5
+ PRED4x4_LOWPASS m1, m3, m4, m7
+ mova [r2+r3*4], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r2+r1*1], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r2+r3*2], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r2+r3*1], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r0+r3*4], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r0+r1*1], m1
+ mova m2, m0
+ pslldq m1, 2
+ psrldq m2, 14
+ pslldq m0, 2
+ por m1, m2
+ mova [r0+r3*2], m1
+ pslldq m1, 2
+ psrldq m0, 14
+ por m1, m0
+ mova [r0+r3*1], m1
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_DOWN_LEFT sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_DOWN_LEFT ssse3
+
+;-----------------------------------------------------------------------------
+;void pred8x8l_down_right_mxext(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_RIGHT 1
+cglobal pred8x8l_down_right_10_%1, 4,5,8
+ sub r0, r3
+ lea r4, [r0+r3*2]
+ mova m0, [r0+r3*1-16]
+ punpckhwd m0, [r0+r3*0-16]
+ mova m1, [r4+r3*1-16]
+ punpckhwd m1, [r0+r3*2-16]
+ mov r4, r0
+ punpckhdq m1, m0
+ lea r0, [r0+r3*4]
+ mova m2, [r0+r3*1-16]
+ punpckhwd m2, [r0+r3*0-16]
+ lea r0, [r0+r3*2]
+ mova m3, [r0+r3*1-16]
+ punpckhwd m3, [r0+r3*0-16]
+ punpckhdq m3, m2
+ punpckhqdq m3, m1
+ lea r0, [r0+r3*2]
+ mova m0, [r0+r3*0-16]
+ mova m1, [r4]
+ mov r0, r4
+ mova m4, m3
+ mova m2, m3
+ PALIGNR m4, m0, 14, m0
+ PALIGNR m1, m2, 2, m2
+ test r1, r1 ; top_left
+ jz .fix_lt_1
+.do_left:
+ mova m0, m4
+ PRED4x4_LOWPASS m2, m1, m4, m3
+ mova m4, m0
+ mova m7, m2
+ mova m6, m2
+ PRED4x4_LOWPASS m1, m3, m0, m4
+ pslldq m1, 14
+ PALIGNR m7, m1, 14, m3
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1 ; top_left
+ jz .fix_lt_2
+ test r2, r2 ; top_right
+ jz .fix_tr_1
+.do_top:
+ PRED4x4_LOWPASS m4, m2, m1, m3
+ mova m5, m4
+ jmp .body
+.fix_lt_1:
+ mova m5, m3
+ pxor m5, m4
+ psrldq m5, 14
+ pslldq m5, 12
+ pxor m1, m5
+ jmp .do_left
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2 ; top_right
+ jnz .do_top
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+ jmp .do_top
+.body
+ lea r1, [r3+r3*2]
+ mova m1, m7
+ mova m7, m5
+ mova m5, m6
+ mova m2, m7
+ lea r2, [r0+r3*4]
+ PALIGNR m2, m6, 2, m0
+ mova m3, m7
+ PALIGNR m3, m6, 14, m0
+ mova m4, m7
+ psrldq m4, 2
+ PRED4x4_LOWPASS m0, m1, m2, m5
+ PRED4x4_LOWPASS m1, m3, m4, m7
+ mova [r2+r3*4], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r2+r1*1], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r2+r3*2], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r2+r3*1], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r0+r3*4], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r0+r1*1], m0
+ mova m2, m1
+ psrldq m0, 2
+ pslldq m2, 14
+ psrldq m1, 2
+ por m0, m2
+ mova [r0+r3*2], m0
+ psrldq m0, 2
+ pslldq m1, 14
+ por m0, m1
+ mova [r0+r3*1], m0
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_DOWN_RIGHT sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_DOWN_RIGHT ssse3
+
+;-----------------------------------------------------------------------------
+; void pred8x8l_vertical_right(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL_RIGHT 1
+cglobal pred8x8l_vertical_right_10_%1, 4,5,8
+ sub r0, r3
+ lea r4, [r0+r3*2]
+ mova m0, [r0+r3*1-16]
+ punpckhwd m0, [r0+r3*0-16]
+ mova m1, [r4+r3*1-16]
+ punpckhwd m1, [r0+r3*2-16]
+ mov r4, r0
+ punpckhdq m1, m0
+ lea r0, [r0+r3*4]
+ mova m2, [r0+r3*1-16]
+ punpckhwd m2, [r0+r3*0-16]
+ lea r0, [r0+r3*2]
+ mova m3, [r0+r3*1-16]
+ punpckhwd m3, [r0+r3*0-16]
+ punpckhdq m3, m2
+ punpckhqdq m3, m1
+ lea r0, [r0+r3*2]
+ mova m0, [r0+r3*0-16]
+ mova m1, [r4]
+ mov r0, r4
+ mova m4, m3
+ mova m2, m3
+ PALIGNR m4, m0, 14, m0
+ PALIGNR m1, m2, 2, m2
+ test r1, r1
+ jz .fix_lt_1
+ jmp .do_left
+.fix_lt_1:
+ mova m5, m3
+ pxor m5, m4
+ psrldq m5, 14
+ pslldq m5, 12
+ pxor m1, m5
+ jmp .do_left
+.fix_lt_2:
+ mova m5, m3
+ pxor m5, m2
+ pslldq m5, 14
+ psrldq m5, 14
+ pxor m2, m5
+ test r2, r2
+ jnz .do_top
+.fix_tr_1:
+ mova m5, m3
+ pxor m5, m1
+ psrldq m5, 14
+ pslldq m5, 14
+ pxor m1, m5
+ jmp .do_top
+.do_left:
+ mova m0, m4
+ PRED4x4_LOWPASS m2, m1, m4, m3
+ mova m7, m2
+ mova m0, [r0-16]
+ mova m3, [r0]
+ mova m1, [r0+16]
+ mova m2, m3
+ mova m4, m3
+ PALIGNR m2, m0, 14, m0
+ PALIGNR m1, m4, 2, m4
+ test r1, r1
+ jz .fix_lt_2
+ test r2, r2
+ jz .fix_tr_1
+.do_top
+ PRED4x4_LOWPASS m6, m2, m1, m3
+ lea r1, [r3+r3*2]
+ mova m2, m6
+ mova m3, m6
+ PALIGNR m3, m7, 14, m0
+ PALIGNR m6, m7, 12, m1
+ mova m4, m3
+ pavgw m3, m2
+ lea r2, [r0+r3*4]
+ PRED4x4_LOWPASS m0, m6, m2, m4
+ mova [r0+r3*1], m3
+ mova [r0+r3*2], m0
+ mova m5, m0
+ mova m6, m3
+ mova m1, m7
+ mova m2, m1
+ pslldq m2, 2
+ mova m3, m1
+ pslldq m3, 4
+ PRED4x4_LOWPASS m0, m1, m3, m2
+ PALIGNR m6, m0, 14, m2
+ mova [r0+r1*1], m6
+ pslldq m0, 2
+ PALIGNR m5, m0, 14, m1
+ mova [r0+r3*4], m5
+ pslldq m0, 2
+ PALIGNR m6, m0, 14, m2
+ mova [r2+r3*1], m6
+ pslldq m0, 2
+ PALIGNR m5, m0, 14, m1
+ mova [r2+r3*2], m5
+ pslldq m0, 2
+ PALIGNR m6, m0, 14, m2
+ mova [r2+r1*1], m6
+ pslldq m0, 2
+ PALIGNR m5, m0, 14, m1
+ mova [r2+r3*4], m5
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_VERTICAL_RIGHT sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_VERTICAL_RIGHT ssse3
+
+;-----------------------------------------------------------------------------
+; void pred8x8l_horizontal_up(pixel *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL_UP 1
+cglobal pred8x8l_horizontal_up_10_%1, 4,4,8
+ sub r0, r3
+ lea r2, [r0+r3*2]
+ mova m0, [r0+r3*1-16]
+ test r1, r1
+ lea r1, [r0+r3]
+ cmovnz r1, r0
+ punpckhwd m0, [r1+r3*0-16]
+ mova m1, [r2+r3*1-16]
+ punpckhwd m1, [r0+r3*2-16]
+ mov r2, r0
+ punpckhdq m1, m0
+ lea r0, [r0+r3*4]
+ mova m2, [r0+r3*1-16]
+ punpckhwd m2, [r0+r3*0-16]
+ lea r0, [r0+r3*2]
+ mova m3, [r0+r3*1-16]
+ punpckhwd m3, [r0+r3*0-16]
+ punpckhdq m3, m2
+ punpckhqdq m3, m1
+ lea r0, [r0+r3*2]
+ mova m0, [r0+r3*0-16]
+ mova m1, [r1+r3*0-16]
+ mov r0, r2
+ mova m4, m3
+ mova m2, m3
+ PALIGNR m4, m0, 14, m0
+ PALIGNR m1, m2, 2, m2
+ mova m0, m4
+ PRED4x4_LOWPASS m2, m1, m4, m3
+ mova m4, m0
+ mova m7, m2
+ PRED4x4_LOWPASS m1, m3, m0, m4
+ pslldq m1, 14
+ PALIGNR m7, m1, 14, m3
+ lea r1, [r3+r3*2]
+ pshufd m0, m7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
+ pslldq m7, 14 ; l7 .. .. .. .. .. .. ..
+ mova m2, m0
+ pslld m0, 16
+ psrld m2, 16
+ por m2, m0 ; l7 l6 l5 l4 l3 l2 l1 l0
+ mova m3, m2
+ mova m4, m2
+ mova m5, m2
+ psrldq m2, 2
+ psrldq m3, 4
+ lea r2, [r0+r3*4]
+ por m2, m7 ; l7 l7 l6 l5 l4 l3 l2 l1
+ punpckhwd m7, m7
+ por m3, m7 ; l7 l7 l7 l6 l5 l4 l3 l2
+ pavgw m4, m2
+ PRED4x4_LOWPASS m1, m3, m5, m2
+ mova m5, m4
+ punpcklwd m4, m1 ; p4 p3 p2 p1
+ punpckhwd m5, m1 ; p8 p7 p6 p5
+ mova m6, m5
+ mova m7, m5
+ mova m0, m5
+ PALIGNR m5, m4, 4, m1
+ pshufd m1, m6, 11111001b
+ PALIGNR m6, m4, 8, m2
+ pshufd m2, m7, 11111110b
+ PALIGNR m7, m4, 12, m3
+ pshufd m3, m0, 11111111b
+ mova [r0+r3*1], m4
+ mova [r0+r3*2], m5
+ mova [r0+r1*1], m6
+ mova [r0+r3*4], m7
+ mova [r2+r3*1], m0
+ mova [r2+r3*2], m1
+ mova [r2+r1*1], m2
+ mova [r2+r3*4], m3
+ RET
+%endmacro
+
+INIT_XMM
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_HORIZONTAL_UP sse2
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_HORIZONTAL_UP ssse3
+
+
+
+;-----------------------------------------------------------------------------
+; void pred16x16_vertical(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MOV16 3-5
+ mova [%1+ 0], %2
+ mova [%1+mmsize], %3
+%if mmsize==8
+ mova [%1+ 16], %4
+ mova [%1+ 24], %5
+%endif
+%endmacro
+
+%macro PRED16x16_VERTICAL 1
+cglobal pred16x16_vertical_10_%1, 2,3
+ sub r0, r1
+ mov r2, 8
+ mova m0, [r0+ 0]
+ mova m1, [r0+mmsize]
+%if mmsize==8
+ mova m2, [r0+16]
+ mova m3, [r0+24]
+%endif
+.loop:
+ MOV16 r0+r1*1, m0, m1, m2, m3
+ MOV16 r0+r1*2, m0, m1, m2, m3
+ lea r0, [r0+r1*2]
+ dec r2
+ jg .loop
+ REP_RET
+%endmacro
+
+INIT_MMX
+PRED16x16_VERTICAL mmxext
+INIT_XMM
+PRED16x16_VERTICAL sse2
+
+;-----------------------------------------------------------------------------
+; void pred16x16_horizontal(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_HORIZONTAL 1
+cglobal pred16x16_horizontal_10_%1, 2,3
+ mov r2, 8
+.vloop:
+ movd m0, [r0+r1*0-4]
+ movd m1, [r0+r1*1-4]
+ SPLATW m0, m0, 1
+ SPLATW m1, m1, 1
+ MOV16 r0+r1*0, m0, m0, m0, m0
+ MOV16 r0+r1*1, m1, m1, m1, m1
+ lea r0, [r0+r1*2]
+ dec r2
+ jge .vloop
+ REP_RET
+%endmacro
+
+INIT_MMX
+PRED16x16_HORIZONTAL mmxext
+INIT_XMM
+PRED16x16_HORIZONTAL sse2
#define PRED8x8(TYPE, DEPTH, OPT) \
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
+PRED8x8(dc, 10, mmxext)
+PRED8x8(dc, 10, sse2)
+PRED8x8(top_dc, 10, mmxext)
+PRED8x8(top_dc, 10, sse2)
PRED8x8(vertical, 10, sse2)
PRED8x8(horizontal, 10, sse2)
+#define PRED8x8L(TYPE, DEPTH, OPT)\
+void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int has_topleft, int has_topright, int stride);
+
+PRED8x8L(dc, 10, sse2)
+PRED8x8L(dc, 10, ssse3)
+PRED8x8L(top_dc, 10, sse2)
+PRED8x8L(top_dc, 10, ssse3)
+PRED8x8L(vertical, 10, sse2)
+PRED8x8L(vertical, 10, ssse3)
+PRED8x8L(horizontal, 10, sse2)
+PRED8x8L(horizontal, 10, ssse3)
+PRED8x8L(down_left, 10, sse2)
+PRED8x8L(down_left, 10, ssse3)
+PRED8x8L(down_right, 10, sse2)
+PRED8x8L(down_right, 10, ssse3)
+PRED8x8L(vertical_right, 10, sse2)
+PRED8x8L(vertical_right, 10, ssse3)
+PRED8x8L(horizontal_up, 10, sse2)
+PRED8x8L(horizontal_up, 10, ssse3)
+
+#define PRED16x16(TYPE, DEPTH, OPT)\
+void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
+
+PRED16x16(vertical, 10, mmxext)
+PRED16x16(vertical, 10, sse2)
+PRED16x16(horizontal, 10, mmxext)
+PRED16x16(horizontal, 10, sse2)
+
void ff_pred16x16_vertical_mmx (uint8_t *src, int stride);
void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
if (mm_flags & AV_CPU_FLAG_MMX2) {
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
+
+ h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext;
+ h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_mmxext;
+
+ h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
}
if (mm_flags & AV_CPU_FLAG_SSE2) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2;
+ h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2;
+ h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2;
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2;
+
+ h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2;
+ h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2;
+ h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_sse2;
+ h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_sse2;
+ h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
+ h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
+ h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_sse2;
+ h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_sse2;
+
+ h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
}
if (mm_flags & AV_CPU_FLAG_SSSE3) {
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
+
+ h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_ssse3;
+ h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_ssse3;
+ h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_ssse3;
+ h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_ssse3;
+ h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
}
#if HAVE_AVX
if (mm_flags & AV_CPU_FLAG_AVX) {
%endif
%endmacro
+%macro SPLATD 2-3 0
+%if mmsize == 16
+ pshufd %1, %2, (%3)*0x55
+%else
+ pshufw %1, %2, (%3)*0x11 + ((%3)+1)*0x44
+%endif
+%endmacro
+
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
pminsw %1, %3
}
snd_pcm_hw_params_get_buffer_size_max(hw_params, &buffer_size);
+ buffer_size = FFMIN(buffer_size, ALSA_BUFFER_SIZE_MAX);
/* TODO: maybe use ctx->max_picture_buffer somehow */
res = snd_pcm_hw_params_set_buffer_size_near(h, hw_params, &buffer_size);
if (res < 0) {
}
snd_pcm_hw_params_get_period_size_min(hw_params, &period_size, NULL);
+ if (!period_size)
+ period_size = buffer_size / 4;
res = snd_pcm_hw_params_set_period_size_near(h, hw_params, &period_size, NULL);
if (res < 0) {
av_log(ctx, AV_LOG_ERROR, "cannot set ALSA period size (%s)\n",
typedef void (*ff_reorder_func)(const void *, void *, int);
+#define ALSA_BUFFER_SIZE_MAX 32768
+
typedef struct {
AVClass *class;
snd_pcm_t *h;
int map_len;
int len;
AVRational main_timebase = {0, 0};
- struct gxf_stream_info si;
+ struct gxf_stream_info *si = s->priv_data;
int i;
if (!parse_packet_header(pb, &pkt_type, &map_len) || pkt_type != PKT_MAP) {
av_log(s, AV_LOG_ERROR, "map packet not found\n");
return 0;
}
map_len -= len;
- gxf_material_tags(pb, &len, &si);
+ gxf_material_tags(pb, &len, si);
avio_skip(pb, len);
map_len -= 2;
len = avio_rb16(pb); // length of track description
track_id = avio_r8(pb);
track_len = avio_rb16(pb);
len -= track_len;
- gxf_track_tags(pb, &track_len, &si);
+ gxf_track_tags(pb, &track_len, si);
avio_skip(pb, track_len);
if (!(track_type & 0x80)) {
av_log(s, AV_LOG_ERROR, "invalid track type %x\n", track_type);
if (idx < 0) continue;
st = s->streams[idx];
if (!main_timebase.num || !main_timebase.den) {
- main_timebase.num = si.frames_per_second.den;
- main_timebase.den = si.frames_per_second.num * 2;
+ main_timebase.num = si->frames_per_second.den;
+ main_timebase.den = si->frames_per_second.num * 2;
}
- st->start_time = si.first_field;
- if (si.first_field != AV_NOPTS_VALUE && si.last_field != AV_NOPTS_VALUE)
- st->duration = si.last_field - si.first_field;
+ st->start_time = si->first_field;
+ if (si->first_field != AV_NOPTS_VALUE && si->last_field != AV_NOPTS_VALUE)
+ st->duration = si->last_field - si->first_field;
}
if (len < 0)
av_log(s, AV_LOG_ERROR, "invalid track description length specified\n");
AVIOContext *pb = s->pb;
GXFPktType pkt_type;
int pkt_len;
- while (!url_feof(pb)) {
+ struct gxf_stream_info *si = s->priv_data;
+
+ while (!pb->eof_reached) {
AVStream *st;
int track_type, track_id, ret;
int field_nr, field_info, skip = 0;
avio_skip(pb, skip);
pkt->stream_index = stream_index;
pkt->dts = field_nr;
+
+ //set duration manually for DV or else lavf misdetects the frame rate
+ if (st->codec->codec_id == CODEC_ID_DVVIDEO)
+ pkt->duration = si->fields_per_frame;
+
return ret;
}
return AVERROR(EIO);
AVInputFormat ff_gxf_demuxer = {
"gxf",
NULL_IF_CONFIG_SMALL("GXF format"),
- 0,
+ sizeof(struct gxf_stream_info),
gxf_probe,
gxf_header,
gxf_packet,
film->audio_samplerate = AV_RB16(&scratch[24]);
film->audio_channels = scratch[21];
film->audio_bits = scratch[22];
- if (film->audio_bits == 8)
+ if (scratch[23] == 2)
+ film->audio_type = CODEC_ID_ADPCM_ADX;
+ else if (film->audio_bits == 8)
film->audio_type = CODEC_ID_PCM_S8;
else if (film->audio_bits == 16)
film->audio_type = CODEC_ID_PCM_S16BE;
st->codec->codec_id = film->audio_type;
st->codec->codec_tag = 1;
st->codec->channels = film->audio_channels;
- st->codec->bits_per_coded_sample = film->audio_bits;
st->codec->sample_rate = film->audio_samplerate;
+
+ if (film->audio_type == CODEC_ID_ADPCM_ADX) {
+ st->codec->bits_per_coded_sample = 18 * 8 / 32;
+ st->codec->block_align = st->codec->channels * 18;
+ } else {
+ st->codec->bits_per_coded_sample = film->audio_bits;
+ st->codec->block_align = st->codec->channels *
+ st->codec->bits_per_coded_sample / 8;
+ }
+
st->codec->bit_rate = st->codec->channels * st->codec->sample_rate *
st->codec->bits_per_coded_sample;
- st->codec->block_align = st->codec->channels *
- st->codec->bits_per_coded_sample / 8;
}
/* load the sample table */
film->sample_table[i].pts *= film->base_clock;
film->sample_table[i].pts /= film->audio_samplerate;
- audio_frame_counter += (film->sample_table[i].sample_size /
- (film->audio_channels * film->audio_bits / 8));
+ if (film->audio_type == CODEC_ID_ADPCM_ADX)
+ audio_frame_counter += (film->sample_table[i].sample_size * 32 /
+ (18 * film->audio_channels));
+ else
+ audio_frame_counter += (film->sample_table[i].sample_size /
+ (film->audio_channels * film->audio_bits / 8));
} else {
film->sample_table[i].stream = film->video_stream_index;
film->sample_table[i].pts = AV_RB32(&scratch[8]) & 0x7FFFFFFF;
return AVERROR(ENOMEM);
avio_read(pb, pkt->data, sample->sample_size);
} else if ((sample->stream == film->audio_stream_index) &&
- (film->audio_channels == 2)) {
+ (film->audio_channels == 2) &&
+ (film->audio_type != CODEC_ID_ADPCM_ADX)) {
/* stereo PCM needs to be interleaved */
if (av_new_packet(pkt, sample->sample_size))