32 #define RV40_LOWPASS(OPNAME, OP) \
33 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
34 const int h, const int C1, const int C2, const int SHIFT){\
35 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
37 for(i = 0; i < h; i++)\
39 OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
40 OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
41 OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
42 OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
43 OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44 OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45 OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46 OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
52 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
53 const int w, const int C1, const int C2, const int SHIFT){\
54 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
56 for(i = 0; i < w; i++)\
58 const int srcB = src[-2*srcStride];\
59 const int srcA = src[-1*srcStride];\
60 const int src0 = src[0 *srcStride];\
61 const int src1 = src[1 *srcStride];\
62 const int src2 = src[2 *srcStride];\
63 const int src3 = src[3 *srcStride];\
64 const int src4 = src[4 *srcStride];\
65 const int src5 = src[5 *srcStride];\
66 const int src6 = src[6 *srcStride];\
67 const int src7 = src[7 *srcStride];\
68 const int src8 = src[8 *srcStride];\
69 const int src9 = src[9 *srcStride];\
70 const int src10 = src[10*srcStride];\
71 OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
72 OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
73 OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
74 OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
75 OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76 OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77 OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78 OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
84 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
85 const int w, const int C1, const int C2, const int SHIFT){\
86 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
87 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
90 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
91 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
94 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
95 const int h, const int C1, const int C2, const int SHIFT){\
96 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
97 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
100 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
101 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
105 #define RV40_MC(OPNAME, SIZE) \
106 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
107 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
110 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
111 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
114 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
115 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
118 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
119 uint8_t full[SIZE*(SIZE+5)];\
120 uint8_t * const full_mid = full + SIZE*2;\
121 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
122 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
125 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
126 uint8_t full[SIZE*(SIZE+5)];\
127 uint8_t * const full_mid = full + SIZE*2;\
128 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
129 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
132 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
133 uint8_t full[SIZE*(SIZE+5)];\
134 uint8_t * const full_mid = full + SIZE*2;\
135 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
136 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
139 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
140 uint8_t full[SIZE*(SIZE+5)];\
141 uint8_t * const full_mid = full + SIZE*2;\
142 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
143 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
146 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
147 uint8_t full[SIZE*(SIZE+5)];\
148 uint8_t * const full_mid = full + SIZE*2;\
149 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
150 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
153 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
154 uint8_t full[SIZE*(SIZE+5)];\
155 uint8_t * const full_mid = full + SIZE*2;\
156 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
157 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
160 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
161 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
164 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
165 uint8_t full[SIZE*(SIZE+5)];\
166 uint8_t * const full_mid = full + SIZE*2;\
167 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
168 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
171 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
172 uint8_t full[SIZE*(SIZE+5)];\
173 uint8_t * const full_mid = full + SIZE*2;\
174 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
175 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
179 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
180 #define op_put(a, b) a = cm[b]
200 #define RV40_CHROMA_MC(OPNAME, OP)\
201 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
202 const int A = (8-x) * (8-y);\
203 const int B = ( x) * (8-y);\
204 const int C = (8-x) * ( y);\
205 const int D = ( x) * ( y);\
207 int bias = rv40_bias[y>>1][x>>1];\
209 assert(x<8 && y<8 && x>=0 && y>=0);\
212 for(i = 0; i < h; i++){\
213 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
214 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
215 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
216 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
221 const int E = B + C;\
222 const int step = C ? stride : 1;\
223 for(i = 0; i < h; i++){\
224 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
225 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
226 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
227 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
234 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
235 const int A = (8-x) * (8-y);\
236 const int B = ( x) * (8-y);\
237 const int C = (8-x) * ( y);\
238 const int D = ( x) * ( y);\
240 int bias = rv40_bias[y>>1][x>>1];\
242 assert(x<8 && y<8 && x>=0 && y>=0);\
245 for(i = 0; i < h; i++){\
246 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
247 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
248 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
249 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
250 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
251 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
252 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
253 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
258 const int E = B + C;\
259 const int step = C ? stride : 1;\
260 for(i = 0; i < h; i++){\
261 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
262 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
263 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
264 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
265 OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
266 OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
267 OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
268 OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
275 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
276 #define op_put(a, b) a = ((b)>>6)
281 #define RV40_WEIGHT_FUNC(size) \
282 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
286 for (j = 0; j < size; j++) {\
287 for (i = 0; i < size; i++)\
288 dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
294 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
298 for (j = 0; j < size; j++) {\
299 for (i = 0; i < size; i++)\
300 dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
314 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
315 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
322 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
323 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
326 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
344 for (i = 0; i < 4; i++, src +=
stride) {
345 int diff_p1p0 = src[-2*
step] - src[-1*
step];
346 int diff_q1q0 = src[ 1*
step] - src[ 0*
step];
347 int diff_p1p2 = src[-2*
step] - src[-3*
step];
348 int diff_q1q2 = src[ 1*
step] - src[ 2*
step];
354 u = (alpha *
FFABS(t)) >> 7;
355 if (u > 3 - (filter_p1 && filter_q1))
359 if (filter_p1 && filter_q1)
362 diff =
CLIP_SYMM((t + 4) >> 3, lim_p0q0);
363 src[-1*
step] = cm[src[-1*
step] + diff];
364 src[ 0*
step] = cm[src[ 0*
step] - diff];
366 if (filter_p1 &&
FFABS(diff_p1p2) <= beta) {
367 t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
371 if (filter_q1 &&
FFABS(diff_q1q2) <= beta) {
372 t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
379 const int filter_p1,
const int filter_q1,
380 const int alpha,
const int beta,
381 const int lim_p0q0,
const int lim_q1,
385 alpha, beta, lim_p0q0, lim_q1, lim_p1);
389 const int filter_p1,
const int filter_q1,
390 const int alpha,
const int beta,
391 const int lim_p0q0,
const int lim_q1,
395 alpha, beta, lim_p0q0, lim_q1, lim_p1);
408 for(i = 0; i < 4; i++, src +=
stride){
409 int sflag, p0, q0, p1, q1;
415 sflag = (alpha *
FFABS(t)) >> 7;
419 p0 = (25*src[-3*
step] + 26*src[-2*
step] + 26*src[-1*
step] +
423 q0 = (25*src[-2*
step] + 26*src[-1*
step] + 26*src[ 0*
step] +
425 rv40_dither_r[dmode + i]) >> 7;
428 p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
429 q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
432 p1 = (25*src[-4*
step] + 26*src[-3*
step] + 26*src[-2*
step] + 26*p0 +
434 q1 = (25*src[-1*
step] + 26*q0 + 26*src[ 1*
step] + 26*src[ 2*
step] +
435 25*src[ 3*
step] + rv40_dither_r[dmode + i]) >> 7;
438 p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
439 q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
449 51*src[-3*
step] + 26*src[-4*
step] + 64) >> 7;
451 51*src[ 2*
step] + 26*src[ 3*
step] + 64) >> 7;
457 const int alpha,
const int lims,
458 const int dmode,
const int chroma)
464 const int alpha,
const int lims,
465 const int dmode,
const int chroma)
476 int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
477 int strong0 = 0, strong1 = 0;
481 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
482 sum_p1p0 += ptr[-2*
step] - ptr[-1*
step];
483 sum_q1q0 += ptr[ 1*
step] - ptr[ 0*
step];
486 *p1 =
FFABS(sum_p1p0) < (beta << 2);
487 *q1 =
FFABS(sum_q1q0) < (beta << 2);
495 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
496 sum_p1p2 += ptr[-2*
step] - ptr[-3*
step];
497 sum_q1q2 += ptr[ 1*
step] - ptr[ 2*
step];
500 strong0 = *p1 && (
FFABS(sum_p1p2) < beta2);
501 strong1 = *q1 && (
FFABS(sum_q1q2) < beta2);
503 return strong0 && strong1;
507 int beta,
int beta2,
int edge,
514 int beta,
int beta2,
int edge,
qpel_mc_func put_pixels_tab[4][16]
static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
rv40_loop_filter_strength_func rv40_loop_filter_strength[2]
static av_always_inline void rv40_strong_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
#define RV40_LOWPASS(OPNAME, OP)
av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext *dsp)
rv40_weak_loop_filter_func rv40_weak_loop_filter[2]
rv40_weight_func rv40_weight_pixels_tab[2][2]
Biweight functions, first dimension is transform size (16/8), second is whether the weight is prescal...
static av_always_inline int rv40_loop_filter_strength(uint8_t *src, int step, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
qpel_mc_func avg_pixels_tab[4][16]
RV30/40 decoder motion compensation functions.
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride)
#define RV40_MC(OPNAME, SIZE)
static av_always_inline void rv40_weak_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride)
static const uint8_t rv40_dither_r[16]
dither values for deblocking filter - right/bottom values
static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
av_cold void ff_rv34dsp_init(RV34DSPContext *c, DSPContext *dsp)
void ff_rv40dsp_init_arm(RV34DSPContext *c, DSPContext *dsp)
static const int rv40_bias[4][4]
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride)
#define RV40_WEIGHT_FUNC(size)
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
static const uint8_t rv40_dither_l[16]
dither values for deblocking filter - left/top values
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
common internal and external API header
rv40_strong_loop_filter_func rv40_strong_loop_filter[2]
static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
h264_chroma_mc_func avg_chroma_pixels_tab[3]
h264_chroma_mc_func put_chroma_pixels_tab[3]
#define RV40_CHROMA_MC(OPNAME, OP)
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride)