Libav
qpeldsp.c
Go to the documentation of this file.
1 /*
2  * quarterpel DSP functions
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
26 #include <stddef.h>
27 #include <stdint.h>
28 
29 #include "config.h"
30 #include "libavutil/attributes.h"
31 #include "copy_block.h"
32 #include "qpeldsp.h"
33 
34 #define BIT_DEPTH 8
35 #include "hpel_template.c"
36 #include "pel_template.c"
37 #include "qpel_template.c"
38 
39 #define QPEL_MC(r, OPNAME, RND, OP) \
40 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \
41  int dstStride, int srcStride, \
42  int h) \
43 { \
44  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
45  int i; \
46  \
47  for (i = 0; i < h; i++) { \
48  OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
49  OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
50  OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
51  OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
52  OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
53  OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
54  OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
55  OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
56  dst += dstStride; \
57  src += srcStride; \
58  } \
59 } \
60  \
61 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, \
62  int dstStride, int srcStride) \
63 { \
64  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
65  const int w = 8; \
66  int i; \
67  \
68  for (i = 0; i < w; i++) { \
69  const int src0 = src[0 * srcStride]; \
70  const int src1 = src[1 * srcStride]; \
71  const int src2 = src[2 * srcStride]; \
72  const int src3 = src[3 * srcStride]; \
73  const int src4 = src[4 * srcStride]; \
74  const int src5 = src[5 * srcStride]; \
75  const int src6 = src[6 * srcStride]; \
76  const int src7 = src[7 * srcStride]; \
77  const int src8 = src[8 * srcStride]; \
78  OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
79  OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
80  OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
81  OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
82  OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
83  OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
84  OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
85  OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
86  dst++; \
87  src++; \
88  } \
89 } \
90  \
91 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, \
92  const uint8_t *src, \
93  int dstStride, int srcStride, \
94  int h) \
95 { \
96  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
97  int i; \
98  \
99  for (i = 0; i < h; i++) { \
100  OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
101  OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
102  OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
103  OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
104  OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
105  OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \
106  OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \
107  OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \
108  OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \
109  OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \
110  OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \
111  OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \
112  OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \
113  OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
114  OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
115  OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
116  dst += dstStride; \
117  src += srcStride; \
118  } \
119 } \
120  \
121 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, \
122  const uint8_t *src, \
123  int dstStride, int srcStride) \
124 { \
125  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
126  const int w = 16; \
127  int i; \
128  \
129  for (i = 0; i < w; i++) { \
130  const int src0 = src[0 * srcStride]; \
131  const int src1 = src[1 * srcStride]; \
132  const int src2 = src[2 * srcStride]; \
133  const int src3 = src[3 * srcStride]; \
134  const int src4 = src[4 * srcStride]; \
135  const int src5 = src[5 * srcStride]; \
136  const int src6 = src[6 * srcStride]; \
137  const int src7 = src[7 * srcStride]; \
138  const int src8 = src[8 * srcStride]; \
139  const int src9 = src[9 * srcStride]; \
140  const int src10 = src[10 * srcStride]; \
141  const int src11 = src[11 * srcStride]; \
142  const int src12 = src[12 * srcStride]; \
143  const int src13 = src[13 * srcStride]; \
144  const int src14 = src[14 * srcStride]; \
145  const int src15 = src[15 * srcStride]; \
146  const int src16 = src[16 * srcStride]; \
147  OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
148  OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
149  OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
150  OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
151  OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
152  OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \
153  OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \
154  OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \
155  OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \
156  OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \
157  OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \
158  OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \
159  OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \
160  OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
161  OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
162  OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
163  dst++; \
164  src++; \
165  } \
166 } \
167  \
168 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, const uint8_t *src, \
169  ptrdiff_t stride) \
170 { \
171  uint8_t half[64]; \
172  \
173  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
174  OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \
175 } \
176  \
177 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, const uint8_t *src, \
178  ptrdiff_t stride) \
179 { \
180  OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \
181 } \
182  \
183 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, const uint8_t *src, \
184  ptrdiff_t stride) \
185 { \
186  uint8_t half[64]; \
187  \
188  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
189  OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \
190 } \
191  \
192 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, const uint8_t *src, \
193  ptrdiff_t stride) \
194 { \
195  uint8_t full[16 * 9]; \
196  uint8_t half[64]; \
197  \
198  copy_block9(full, src, 16, stride, 9); \
199  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
200  OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \
201 } \
202  \
203 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, const uint8_t *src, \
204  ptrdiff_t stride) \
205 { \
206  uint8_t full[16 * 9]; \
207  \
208  copy_block9(full, src, 16, stride, 9); \
209  OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \
210 } \
211  \
212 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, const uint8_t *src, \
213  ptrdiff_t stride) \
214 { \
215  uint8_t full[16 * 9]; \
216  uint8_t half[64]; \
217  \
218  copy_block9(full, src, 16, stride, 9); \
219  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
220  OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \
221 } \
222  \
223 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, const uint8_t *src, \
224  ptrdiff_t stride) \
225 { \
226  uint8_t full[16 * 9]; \
227  uint8_t halfH[72]; \
228  uint8_t halfV[64]; \
229  uint8_t halfHV[64]; \
230  \
231  copy_block9(full, src, 16, stride, 9); \
232  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
233  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
234  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
235  OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \
236  stride, 16, 8, 8, 8, 8); \
237 } \
238  \
239 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, const uint8_t *src, \
240  ptrdiff_t stride) \
241 { \
242  uint8_t full[16 * 9]; \
243  uint8_t halfH[72]; \
244  uint8_t halfHV[64]; \
245  \
246  copy_block9(full, src, 16, stride, 9); \
247  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
248  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
249  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
250  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
251 } \
252  \
253 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, const uint8_t *src, \
254  ptrdiff_t stride) \
255 { \
256  uint8_t full[16 * 9]; \
257  uint8_t halfH[72]; \
258  uint8_t halfV[64]; \
259  uint8_t halfHV[64]; \
260  \
261  copy_block9(full, src, 16, stride, 9); \
262  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
263  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
264  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
265  OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \
266  stride, 16, 8, 8, 8, 8); \
267 } \
268  \
269 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, const uint8_t *src, \
270  ptrdiff_t stride) \
271 { \
272  uint8_t full[16 * 9]; \
273  uint8_t halfH[72]; \
274  uint8_t halfHV[64]; \
275  \
276  copy_block9(full, src, 16, stride, 9); \
277  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
278  put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
279  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
280  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
281 } \
282  \
283 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, const uint8_t *src, \
284  ptrdiff_t stride) \
285 { \
286  uint8_t full[16 * 9]; \
287  uint8_t halfH[72]; \
288  uint8_t halfV[64]; \
289  uint8_t halfHV[64]; \
290  \
291  copy_block9(full, src, 16, stride, 9); \
292  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
293  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
294  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
295  OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \
296  stride, 16, 8, 8, 8, 8); \
297 } \
298  \
299 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, const uint8_t *src, \
300  ptrdiff_t stride) \
301 { \
302  uint8_t full[16 * 9]; \
303  uint8_t halfH[72]; \
304  uint8_t halfHV[64]; \
305  \
306  copy_block9(full, src, 16, stride, 9); \
307  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
308  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
309  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
310  OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
311 } \
312  \
313 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, const uint8_t *src, \
314  ptrdiff_t stride) \
315 { \
316  uint8_t full[16 * 9]; \
317  uint8_t halfH[72]; \
318  uint8_t halfV[64]; \
319  uint8_t halfHV[64]; \
320  \
321  copy_block9(full, src, 16, stride, 9); \
322  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
323  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
324  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
325  OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \
326  stride, 16, 8, 8, 8, 8); \
327 } \
328  \
329 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, const uint8_t *src, \
330  ptrdiff_t stride) \
331 { \
332  uint8_t full[16 * 9]; \
333  uint8_t halfH[72]; \
334  uint8_t halfHV[64]; \
335  \
336  copy_block9(full, src, 16, stride, 9); \
337  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
338  put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
339  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
340  OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
341 } \
342  \
343 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, const uint8_t *src, \
344  ptrdiff_t stride) \
345 { \
346  uint8_t halfH[72]; \
347  uint8_t halfHV[64]; \
348  \
349  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
350  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
351  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
352 } \
353  \
354 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, const uint8_t *src, \
355  ptrdiff_t stride) \
356 { \
357  uint8_t halfH[72]; \
358  uint8_t halfHV[64]; \
359  \
360  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
361  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
362  OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
363 } \
364  \
365 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, const uint8_t *src, \
366  ptrdiff_t stride) \
367 { \
368  uint8_t full[16 * 9]; \
369  uint8_t halfH[72]; \
370  uint8_t halfV[64]; \
371  uint8_t halfHV[64]; \
372  \
373  copy_block9(full, src, 16, stride, 9); \
374  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
375  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
376  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
377  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
378 } \
379  \
380 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, const uint8_t *src, \
381  ptrdiff_t stride) \
382 { \
383  uint8_t full[16 * 9]; \
384  uint8_t halfH[72]; \
385  \
386  copy_block9(full, src, 16, stride, 9); \
387  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
388  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
389  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
390 } \
391  \
392 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, const uint8_t *src, \
393  ptrdiff_t stride) \
394 { \
395  uint8_t full[16 * 9]; \
396  uint8_t halfH[72]; \
397  uint8_t halfV[64]; \
398  uint8_t halfHV[64]; \
399  \
400  copy_block9(full, src, 16, stride, 9); \
401  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
402  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
403  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
404  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
405 } \
406  \
407 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, const uint8_t *src, \
408  ptrdiff_t stride) \
409 { \
410  uint8_t full[16 * 9]; \
411  uint8_t halfH[72]; \
412  \
413  copy_block9(full, src, 16, stride, 9); \
414  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
415  put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
416  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
417 } \
418  \
419 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, const uint8_t *src, \
420  ptrdiff_t stride) \
421 { \
422  uint8_t halfH[72]; \
423  \
424  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
425  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
426 } \
427  \
428 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, const uint8_t *src, \
429  ptrdiff_t stride) \
430 { \
431  uint8_t half[256]; \
432  \
433  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
434  OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \
435 } \
436  \
437 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, const uint8_t *src, \
438  ptrdiff_t stride) \
439 { \
440  OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \
441 } \
442  \
443 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, const uint8_t *src, \
444  ptrdiff_t stride) \
445 { \
446  uint8_t half[256]; \
447  \
448  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
449  OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \
450 } \
451  \
452 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, const uint8_t *src, \
453  ptrdiff_t stride) \
454 { \
455  uint8_t full[24 * 17]; \
456  uint8_t half[256]; \
457  \
458  copy_block17(full, src, 24, stride, 17); \
459  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
460  OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \
461 } \
462  \
463 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, const uint8_t *src, \
464  ptrdiff_t stride) \
465 { \
466  uint8_t full[24 * 17]; \
467  \
468  copy_block17(full, src, 24, stride, 17); \
469  OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \
470 } \
471  \
472 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, const uint8_t *src, \
473  ptrdiff_t stride) \
474 { \
475  uint8_t full[24 * 17]; \
476  uint8_t half[256]; \
477  \
478  copy_block17(full, src, 24, stride, 17); \
479  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
480  OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \
481 } \
482  \
483 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, const uint8_t *src, \
484  ptrdiff_t stride) \
485 { \
486  uint8_t full[24 * 17]; \
487  uint8_t halfH[272]; \
488  uint8_t halfV[256]; \
489  uint8_t halfHV[256]; \
490  \
491  copy_block17(full, src, 24, stride, 17); \
492  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
493  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
494  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
495  OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \
496  stride, 24, 16, 16, 16, 16); \
497 } \
498  \
499 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, const uint8_t *src, \
500  ptrdiff_t stride) \
501 { \
502  uint8_t full[24 * 17]; \
503  uint8_t halfH[272]; \
504  uint8_t halfHV[256]; \
505  \
506  copy_block17(full, src, 24, stride, 17); \
507  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
508  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
509  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
510  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
511 } \
512  \
513 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, const uint8_t *src, \
514  ptrdiff_t stride) \
515 { \
516  uint8_t full[24 * 17]; \
517  uint8_t halfH[272]; \
518  uint8_t halfV[256]; \
519  uint8_t halfHV[256]; \
520  \
521  copy_block17(full, src, 24, stride, 17); \
522  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
523  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
524  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
525  OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \
526  stride, 24, 16, 16, 16, 16); \
527 } \
528  \
529 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, const uint8_t *src, \
530  ptrdiff_t stride) \
531 { \
532  uint8_t full[24 * 17]; \
533  uint8_t halfH[272]; \
534  uint8_t halfHV[256]; \
535  \
536  copy_block17(full, src, 24, stride, 17); \
537  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
538  put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
539  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
540  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
541 } \
542  \
543 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, const uint8_t *src, \
544  ptrdiff_t stride) \
545 { \
546  uint8_t full[24 * 17]; \
547  uint8_t halfH[272]; \
548  uint8_t halfV[256]; \
549  uint8_t halfHV[256]; \
550  \
551  copy_block17(full, src, 24, stride, 17); \
552  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
553  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
554  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
555  OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \
556  stride, 24, 16, 16, 16, 16); \
557 } \
558  \
559 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, const uint8_t *src, \
560  ptrdiff_t stride) \
561 { \
562  uint8_t full[24 * 17]; \
563  uint8_t halfH[272]; \
564  uint8_t halfHV[256]; \
565  \
566  copy_block17(full, src, 24, stride, 17); \
567  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
568  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
569  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
570  OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
571 } \
572  \
573 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, const uint8_t *src, \
574  ptrdiff_t stride) \
575 { \
576  uint8_t full[24 * 17]; \
577  uint8_t halfH[272]; \
578  uint8_t halfV[256]; \
579  uint8_t halfHV[256]; \
580  \
581  copy_block17(full, src, 24, stride, 17); \
582  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
583  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
584  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
585  OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \
586  stride, 24, 16, 16, 16, 16); \
587 } \
588  \
589 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, const uint8_t *src, \
590  ptrdiff_t stride) \
591 { \
592  uint8_t full[24 * 17]; \
593  uint8_t halfH[272]; \
594  uint8_t halfHV[256]; \
595  \
596  copy_block17(full, src, 24, stride, 17); \
597  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
598  put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
599  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
600  OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
601 } \
602  \
603 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, const uint8_t *src, \
604  ptrdiff_t stride) \
605 { \
606  uint8_t halfH[272]; \
607  uint8_t halfHV[256]; \
608  \
609  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
610  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
611  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
612 } \
613  \
614 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, const uint8_t *src, \
615  ptrdiff_t stride) \
616 { \
617  uint8_t halfH[272]; \
618  uint8_t halfHV[256]; \
619  \
620  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
621  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
622  OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
623 } \
624  \
625 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, const uint8_t *src, \
626  ptrdiff_t stride) \
627 { \
628  uint8_t full[24 * 17]; \
629  uint8_t halfH[272]; \
630  uint8_t halfV[256]; \
631  uint8_t halfHV[256]; \
632  \
633  copy_block17(full, src, 24, stride, 17); \
634  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
635  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
636  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
637  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
638 } \
639  \
640 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, const uint8_t *src, \
641  ptrdiff_t stride) \
642 { \
643  uint8_t full[24 * 17]; \
644  uint8_t halfH[272]; \
645  \
646  copy_block17(full, src, 24, stride, 17); \
647  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
648  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
649  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
650 } \
651  \
652 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, const uint8_t *src, \
653  ptrdiff_t stride) \
654 { \
655  uint8_t full[24 * 17]; \
656  uint8_t halfH[272]; \
657  uint8_t halfV[256]; \
658  uint8_t halfHV[256]; \
659  \
660  copy_block17(full, src, 24, stride, 17); \
661  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
662  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
663  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
664  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
665 } \
666  \
667 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, const uint8_t *src, \
668  ptrdiff_t stride) \
669 { \
670  uint8_t full[24 * 17]; \
671  uint8_t halfH[272]; \
672  \
673  copy_block17(full, src, 24, stride, 17); \
674  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
675  put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
676  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
677 } \
678  \
679 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, const uint8_t *src, \
680  ptrdiff_t stride) \
681 { \
682  uint8_t halfH[272]; \
683  \
684  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
685  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
686 }
687 
688 #define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
689 #define op_put(a, b) a = cm[((b) + 16) >> 5]
690 #define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
691 
692 QPEL_MC(0, put_, _, op_put)
693 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
694 QPEL_MC(0, avg_, _, op_avg)
695 
696 #undef op_avg
697 #undef op_put
698 #undef op_put_no_rnd
699 
700 void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
701 {
702  put_pixels8_8_c(dst, src, stride, 8);
703 }
704 
705 void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
706 {
707  avg_pixels8_8_c(dst, src, stride, 8);
708 }
709 
710 void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
711 {
712  put_pixels16_8_c(dst, src, stride, 16);
713 }
714 
715 void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
716 {
717  avg_pixels16_8_c(dst, src, stride, 16);
718 }
719 
720 #define put_qpel8_mc00_c ff_put_pixels8x8_c
721 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
722 #define put_qpel16_mc00_c ff_put_pixels16x16_c
723 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
724 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
725 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
726 
727 void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
728  int dst_stride, int src_stride1, int src_stride2,
729  int h)
730 {
731  put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
732 
733 }
734 
736 {
737 #define dspfunc(PFX, IDX, NUM) \
738  c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
739  c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
740  c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \
741  c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \
742  c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \
743  c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \
744  c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \
745  c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \
746  c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \
747  c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \
748  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
749  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
750  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
751  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
752  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
753  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
754 
755  dspfunc(put_qpel, 0, 16);
756  dspfunc(put_qpel, 1, 8);
757 
758  dspfunc(put_no_rnd_qpel, 0, 16);
759  dspfunc(put_no_rnd_qpel, 1, 8);
760 
761  dspfunc(avg_qpel, 0, 16);
762  dspfunc(avg_qpel, 1, 8);
763 
764  if (ARCH_X86)
766 }
#define ARCH_X86
Definition: config.h:33
int stride
Definition: mace.c:144
void ff_qpeldsp_init_x86(QpelDSPContext *c)
Definition: qpeldsp_init.c:527
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: qpeldsp.c:727
Macro definitions for various function/variable attributes.
#define _(x)
uint8_t
#define av_cold
Definition: attributes.h:66
#define op_put_no_rnd(a, b)
Definition: qpeldsp.c:690
quarterpel DSP functions
void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:700
#define QPEL_MC(r, OPNAME, RND, OP)
Definition: qpeldsp.c:39
quarterpel DSP function templates
void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:715
void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:710
#define op_put(a, b)
Definition: qpeldsp.c:689
#define dspfunc(PFX, IDX, NUM)
quarterpel DSP context
Definition: qpeldsp.h:72
#define op_avg(a, b)
Definition: qpeldsp.c:688
av_cold void ff_qpeldsp_init(QpelDSPContext *c)
Definition: qpeldsp.c:735
void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:705