Libav
dsputil.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "libavutil/attributes.h"
31 #include "libavutil/imgutils.h"
32 #include "libavutil/internal.h"
33 #include "avcodec.h"
34 #include "copy_block.h"
35 #include "dct.h"
36 #include "dsputil.h"
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
40 #include "imgconvert.h"
41 #include "mathops.h"
42 #include "mpegvideo.h"
43 #include "config.h"
44 
45 uint32_t ff_squareTbl[512] = {0, };
46 
47 #define BIT_DEPTH 16
48 #include "dsputil_template.c"
49 #undef BIT_DEPTH
50 
51 #define BIT_DEPTH 8
52 #include "dsputil_template.c"
53 
54 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
55 #define pb_7f (~0UL/255 * 0x7f)
56 #define pb_80 (~0UL/255 * 0x80)
57 
58 /* Specific zigzag scan for 248 idct. NOTE that unlike the
59  specification, we interleave the fields */
61  0, 8, 1, 9, 16, 24, 2, 10,
62  17, 25, 32, 40, 48, 56, 33, 41,
63  18, 26, 3, 11, 4, 12, 19, 27,
64  34, 42, 49, 57, 50, 58, 35, 43,
65  20, 28, 5, 13, 6, 14, 21, 29,
66  36, 44, 51, 59, 52, 60, 37, 45,
67  22, 30, 7, 15, 23, 31, 38, 46,
68  53, 61, 54, 62, 39, 47, 55, 63,
69 };
70 
72  0, 1, 2, 3, 8, 9, 16, 17,
73  10, 11, 4, 5, 6, 7, 15, 14,
74  13, 12, 19, 18, 24, 25, 32, 33,
75  26, 27, 20, 21, 22, 23, 28, 29,
76  30, 31, 34, 35, 40, 41, 48, 49,
77  42, 43, 36, 37, 38, 39, 44, 45,
78  46, 47, 50, 51, 56, 57, 58, 59,
79  52, 53, 54, 55, 60, 61, 62, 63,
80 };
81 
83  0, 8, 16, 24, 1, 9, 2, 10,
84  17, 25, 32, 40, 48, 56, 57, 49,
85  41, 33, 26, 18, 3, 11, 4, 12,
86  19, 27, 34, 42, 50, 58, 35, 43,
87  51, 59, 20, 28, 5, 13, 6, 14,
88  21, 29, 36, 44, 52, 60, 37, 45,
89  53, 61, 22, 30, 7, 15, 23, 31,
90  38, 46, 54, 62, 39, 47, 55, 63,
91 };
92 
93 /* Input permutation for the simple_idct_mmx */
94 static const uint8_t simple_mmx_permutation[64]={
95  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
96  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
97  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
98  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
99  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
100  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
101  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
102  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
103 };
104 
105 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
106 
108  const uint8_t *src_scantable)
109 {
110  int i;
111  int end;
112 
113  st->scantable= src_scantable;
114 
115  for(i=0; i<64; i++){
116  int j;
117  j = src_scantable[i];
118  st->permutated[i] = permutation[j];
119  }
120 
121  end=-1;
122  for(i=0; i<64; i++){
123  int j;
124  j = st->permutated[i];
125  if(j>end) end=j;
126  st->raster_end[i]= end;
127  }
128 }
129 
131  int idct_permutation_type)
132 {
133  int i;
134 
135  switch(idct_permutation_type){
136  case FF_NO_IDCT_PERM:
137  for(i=0; i<64; i++)
138  idct_permutation[i]= i;
139  break;
141  for(i=0; i<64; i++)
142  idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
143  break;
144  case FF_SIMPLE_IDCT_PERM:
145  for(i=0; i<64; i++)
146  idct_permutation[i]= simple_mmx_permutation[i];
147  break;
149  for(i=0; i<64; i++)
150  idct_permutation[i]= ((i&7)<<3) | (i>>3);
151  break;
153  for(i=0; i<64; i++)
154  idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
155  break;
156  case FF_SSE2_IDCT_PERM:
157  for(i=0; i<64; i++)
158  idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
159  break;
160  default:
161  av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
162  }
163 }
164 
165 static int pix_sum_c(uint8_t * pix, int line_size)
166 {
167  int s, i, j;
168 
169  s = 0;
170  for (i = 0; i < 16; i++) {
171  for (j = 0; j < 16; j += 8) {
172  s += pix[0];
173  s += pix[1];
174  s += pix[2];
175  s += pix[3];
176  s += pix[4];
177  s += pix[5];
178  s += pix[6];
179  s += pix[7];
180  pix += 8;
181  }
182  pix += line_size - 16;
183  }
184  return s;
185 }
186 
187 static int pix_norm1_c(uint8_t * pix, int line_size)
188 {
189  int s, i, j;
190  uint32_t *sq = ff_squareTbl + 256;
191 
192  s = 0;
193  for (i = 0; i < 16; i++) {
194  for (j = 0; j < 16; j += 8) {
195 #if 0
196  s += sq[pix[0]];
197  s += sq[pix[1]];
198  s += sq[pix[2]];
199  s += sq[pix[3]];
200  s += sq[pix[4]];
201  s += sq[pix[5]];
202  s += sq[pix[6]];
203  s += sq[pix[7]];
204 #else
205 #if HAVE_FAST_64BIT
206  register uint64_t x=*(uint64_t*)pix;
207  s += sq[x&0xff];
208  s += sq[(x>>8)&0xff];
209  s += sq[(x>>16)&0xff];
210  s += sq[(x>>24)&0xff];
211  s += sq[(x>>32)&0xff];
212  s += sq[(x>>40)&0xff];
213  s += sq[(x>>48)&0xff];
214  s += sq[(x>>56)&0xff];
215 #else
216  register uint32_t x=*(uint32_t*)pix;
217  s += sq[x&0xff];
218  s += sq[(x>>8)&0xff];
219  s += sq[(x>>16)&0xff];
220  s += sq[(x>>24)&0xff];
221  x=*(uint32_t*)(pix+4);
222  s += sq[x&0xff];
223  s += sq[(x>>8)&0xff];
224  s += sq[(x>>16)&0xff];
225  s += sq[(x>>24)&0xff];
226 #endif
227 #endif
228  pix += 8;
229  }
230  pix += line_size - 16;
231  }
232  return s;
233 }
234 
235 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
236  int i;
237 
238  for(i=0; i+8<=w; i+=8){
239  dst[i+0]= av_bswap32(src[i+0]);
240  dst[i+1]= av_bswap32(src[i+1]);
241  dst[i+2]= av_bswap32(src[i+2]);
242  dst[i+3]= av_bswap32(src[i+3]);
243  dst[i+4]= av_bswap32(src[i+4]);
244  dst[i+5]= av_bswap32(src[i+5]);
245  dst[i+6]= av_bswap32(src[i+6]);
246  dst[i+7]= av_bswap32(src[i+7]);
247  }
248  for(;i<w; i++){
249  dst[i+0]= av_bswap32(src[i+0]);
250  }
251 }
252 
253 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
254 {
255  while (len--)
256  *dst++ = av_bswap16(*src++);
257 }
258 
259 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
260 {
261  int s, i;
262  uint32_t *sq = ff_squareTbl + 256;
263 
264  s = 0;
265  for (i = 0; i < h; i++) {
266  s += sq[pix1[0] - pix2[0]];
267  s += sq[pix1[1] - pix2[1]];
268  s += sq[pix1[2] - pix2[2]];
269  s += sq[pix1[3] - pix2[3]];
270  pix1 += line_size;
271  pix2 += line_size;
272  }
273  return s;
274 }
275 
276 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277 {
278  int s, i;
279  uint32_t *sq = ff_squareTbl + 256;
280 
281  s = 0;
282  for (i = 0; i < h; i++) {
283  s += sq[pix1[0] - pix2[0]];
284  s += sq[pix1[1] - pix2[1]];
285  s += sq[pix1[2] - pix2[2]];
286  s += sq[pix1[3] - pix2[3]];
287  s += sq[pix1[4] - pix2[4]];
288  s += sq[pix1[5] - pix2[5]];
289  s += sq[pix1[6] - pix2[6]];
290  s += sq[pix1[7] - pix2[7]];
291  pix1 += line_size;
292  pix2 += line_size;
293  }
294  return s;
295 }
296 
297 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
298 {
299  int s, i;
300  uint32_t *sq = ff_squareTbl + 256;
301 
302  s = 0;
303  for (i = 0; i < h; i++) {
304  s += sq[pix1[ 0] - pix2[ 0]];
305  s += sq[pix1[ 1] - pix2[ 1]];
306  s += sq[pix1[ 2] - pix2[ 2]];
307  s += sq[pix1[ 3] - pix2[ 3]];
308  s += sq[pix1[ 4] - pix2[ 4]];
309  s += sq[pix1[ 5] - pix2[ 5]];
310  s += sq[pix1[ 6] - pix2[ 6]];
311  s += sq[pix1[ 7] - pix2[ 7]];
312  s += sq[pix1[ 8] - pix2[ 8]];
313  s += sq[pix1[ 9] - pix2[ 9]];
314  s += sq[pix1[10] - pix2[10]];
315  s += sq[pix1[11] - pix2[11]];
316  s += sq[pix1[12] - pix2[12]];
317  s += sq[pix1[13] - pix2[13]];
318  s += sq[pix1[14] - pix2[14]];
319  s += sq[pix1[15] - pix2[15]];
320 
321  pix1 += line_size;
322  pix2 += line_size;
323  }
324  return s;
325 }
326 
327 static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
328  const uint8_t *s2, int stride){
329  int i;
330 
331  /* read the pixels */
332  for(i=0;i<8;i++) {
333  block[0] = s1[0] - s2[0];
334  block[1] = s1[1] - s2[1];
335  block[2] = s1[2] - s2[2];
336  block[3] = s1[3] - s2[3];
337  block[4] = s1[4] - s2[4];
338  block[5] = s1[5] - s2[5];
339  block[6] = s1[6] - s2[6];
340  block[7] = s1[7] - s2[7];
341  s1 += stride;
342  s2 += stride;
343  block += 8;
344  }
345 }
346 
347 
348 static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
349  int line_size)
350 {
351  int i;
352 
353  /* read the pixels */
354  for(i=0;i<8;i++) {
355  pixels[0] = av_clip_uint8(block[0]);
356  pixels[1] = av_clip_uint8(block[1]);
357  pixels[2] = av_clip_uint8(block[2]);
358  pixels[3] = av_clip_uint8(block[3]);
359  pixels[4] = av_clip_uint8(block[4]);
360  pixels[5] = av_clip_uint8(block[5]);
361  pixels[6] = av_clip_uint8(block[6]);
362  pixels[7] = av_clip_uint8(block[7]);
363 
364  pixels += line_size;
365  block += 8;
366  }
367 }
368 
369 static void put_signed_pixels_clamped_c(const int16_t *block,
370  uint8_t *restrict pixels,
371  int line_size)
372 {
373  int i, j;
374 
375  for (i = 0; i < 8; i++) {
376  for (j = 0; j < 8; j++) {
377  if (*block < -128)
378  *pixels = 0;
379  else if (*block > 127)
380  *pixels = 255;
381  else
382  *pixels = (uint8_t)(*block + 128);
383  block++;
384  pixels++;
385  }
386  pixels += (line_size - 8);
387  }
388 }
389 
390 static void add_pixels8_c(uint8_t *restrict pixels,
391  int16_t *block,
392  int line_size)
393 {
394  int i;
395 
396  for(i=0;i<8;i++) {
397  pixels[0] += block[0];
398  pixels[1] += block[1];
399  pixels[2] += block[2];
400  pixels[3] += block[3];
401  pixels[4] += block[4];
402  pixels[5] += block[5];
403  pixels[6] += block[6];
404  pixels[7] += block[7];
405  pixels += line_size;
406  block += 8;
407  }
408 }
409 
410 static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
411  int line_size)
412 {
413  int i;
414 
415  /* read the pixels */
416  for(i=0;i<8;i++) {
417  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
418  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
419  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
420  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
421  pixels[4] = av_clip_uint8(pixels[4] + block[4]);
422  pixels[5] = av_clip_uint8(pixels[5] + block[5]);
423  pixels[6] = av_clip_uint8(pixels[6] + block[6]);
424  pixels[7] = av_clip_uint8(pixels[7] + block[7]);
425  pixels += line_size;
426  block += 8;
427  }
428 }
429 
430 static int sum_abs_dctelem_c(int16_t *block)
431 {
432  int sum=0, i;
433  for(i=0; i<64; i++)
434  sum+= FFABS(block[i]);
435  return sum;
436 }
437 
438 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
439 {
440  int i;
441 
442  for (i = 0; i < h; i++) {
443  memset(block, value, 16);
444  block += line_size;
445  }
446 }
447 
448 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
449 {
450  int i;
451 
452  for (i = 0; i < h; i++) {
453  memset(block, value, 8);
454  block += line_size;
455  }
456 }
457 
458 #define avg2(a,b) ((a+b+1)>>1)
459 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
460 
461 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
462 {
463  const int A=(16-x16)*(16-y16);
464  const int B=( x16)*(16-y16);
465  const int C=(16-x16)*( y16);
466  const int D=( x16)*( y16);
467  int i;
468 
469  for(i=0; i<h; i++)
470  {
471  dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
472  dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
473  dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
474  dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
475  dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
476  dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
477  dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
478  dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
479  dst+= stride;
480  src+= stride;
481  }
482 }
483 
484 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
485  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
486 {
487  int y, vx, vy;
488  const int s= 1<<shift;
489 
490  width--;
491  height--;
492 
493  for(y=0; y<h; y++){
494  int x;
495 
496  vx= ox;
497  vy= oy;
498  for(x=0; x<8; x++){ //XXX FIXME optimize
499  int src_x, src_y, frac_x, frac_y, index;
500 
501  src_x= vx>>16;
502  src_y= vy>>16;
503  frac_x= src_x&(s-1);
504  frac_y= src_y&(s-1);
505  src_x>>=shift;
506  src_y>>=shift;
507 
508  if((unsigned)src_x < width){
509  if((unsigned)src_y < height){
510  index= src_x + src_y*stride;
511  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
512  + src[index +1]* frac_x )*(s-frac_y)
513  + ( src[index+stride ]*(s-frac_x)
514  + src[index+stride+1]* frac_x )* frac_y
515  + r)>>(shift*2);
516  }else{
517  index= src_x + av_clip(src_y, 0, height)*stride;
518  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
519  + src[index +1]* frac_x )*s
520  + r)>>(shift*2);
521  }
522  }else{
523  if((unsigned)src_y < height){
524  index= av_clip(src_x, 0, width) + src_y*stride;
525  dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
526  + src[index+stride ]* frac_y )*s
527  + r)>>(shift*2);
528  }else{
529  index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
530  dst[y*stride + x]= src[index ];
531  }
532  }
533 
534  vx+= dxx;
535  vy+= dyx;
536  }
537  ox += dxy;
538  oy += dyy;
539  }
540 }
541 
542 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
543  switch(width){
544  case 2: put_pixels2_8_c (dst, src, stride, height); break;
545  case 4: put_pixels4_8_c (dst, src, stride, height); break;
546  case 8: put_pixels8_8_c (dst, src, stride, height); break;
547  case 16:put_pixels16_8_c(dst, src, stride, height); break;
548  }
549 }
550 
551 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
552  int i,j;
553  for (i=0; i < height; i++) {
554  for (j=0; j < width; j++) {
555  dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
556  }
557  src += stride;
558  dst += stride;
559  }
560 }
561 
562 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
563  int i,j;
564  for (i=0; i < height; i++) {
565  for (j=0; j < width; j++) {
566  dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
567  }
568  src += stride;
569  dst += stride;
570  }
571 }
572 
573 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
574  int i,j;
575  for (i=0; i < height; i++) {
576  for (j=0; j < width; j++) {
577  dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
578  }
579  src += stride;
580  dst += stride;
581  }
582 }
583 
584 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
585  int i,j;
586  for (i=0; i < height; i++) {
587  for (j=0; j < width; j++) {
588  dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
589  }
590  src += stride;
591  dst += stride;
592  }
593 }
594 
595 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
596  int i,j;
597  for (i=0; i < height; i++) {
598  for (j=0; j < width; j++) {
599  dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
600  }
601  src += stride;
602  dst += stride;
603  }
604 }
605 
606 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
607  int i,j;
608  for (i=0; i < height; i++) {
609  for (j=0; j < width; j++) {
610  dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
611  }
612  src += stride;
613  dst += stride;
614  }
615 }
616 
617 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
618  int i,j;
619  for (i=0; i < height; i++) {
620  for (j=0; j < width; j++) {
621  dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
622  }
623  src += stride;
624  dst += stride;
625  }
626 }
627 
628 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
629  int i,j;
630  for (i=0; i < height; i++) {
631  for (j=0; j < width; j++) {
632  dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
633  }
634  src += stride;
635  dst += stride;
636  }
637 }
638 
639 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
640  switch(width){
641  case 2: avg_pixels2_8_c (dst, src, stride, height); break;
642  case 4: avg_pixels4_8_c (dst, src, stride, height); break;
643  case 8: avg_pixels8_8_c (dst, src, stride, height); break;
644  case 16:avg_pixels16_8_c(dst, src, stride, height); break;
645  }
646 }
647 
648 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
649  int i,j;
650  for (i=0; i < height; i++) {
651  for (j=0; j < width; j++) {
652  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
653  }
654  src += stride;
655  dst += stride;
656  }
657 }
658 
659 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
660  int i,j;
661  for (i=0; i < height; i++) {
662  for (j=0; j < width; j++) {
663  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
664  }
665  src += stride;
666  dst += stride;
667  }
668 }
669 
670 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
671  int i,j;
672  for (i=0; i < height; i++) {
673  for (j=0; j < width; j++) {
674  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
675  }
676  src += stride;
677  dst += stride;
678  }
679 }
680 
681 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
682  int i,j;
683  for (i=0; i < height; i++) {
684  for (j=0; j < width; j++) {
685  dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
686  }
687  src += stride;
688  dst += stride;
689  }
690 }
691 
692 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
693  int i,j;
694  for (i=0; i < height; i++) {
695  for (j=0; j < width; j++) {
696  dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
697  }
698  src += stride;
699  dst += stride;
700  }
701 }
702 
703 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
704  int i,j;
705  for (i=0; i < height; i++) {
706  for (j=0; j < width; j++) {
707  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
708  }
709  src += stride;
710  dst += stride;
711  }
712 }
713 
714 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
715  int i,j;
716  for (i=0; i < height; i++) {
717  for (j=0; j < width; j++) {
718  dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
719  }
720  src += stride;
721  dst += stride;
722  }
723 }
724 
725 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
726  int i,j;
727  for (i=0; i < height; i++) {
728  for (j=0; j < width; j++) {
729  dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
730  }
731  src += stride;
732  dst += stride;
733  }
734 }
735 
736 #define QPEL_MC(r, OPNAME, RND, OP) \
737 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
738  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
739  int i;\
740  for(i=0; i<h; i++)\
741  {\
742  OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
743  OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
744  OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
745  OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
746  OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
747  OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
748  OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
749  OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
750  dst+=dstStride;\
751  src+=srcStride;\
752  }\
753 }\
754 \
755 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
756  const int w=8;\
757  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
758  int i;\
759  for(i=0; i<w; i++)\
760  {\
761  const int src0= src[0*srcStride];\
762  const int src1= src[1*srcStride];\
763  const int src2= src[2*srcStride];\
764  const int src3= src[3*srcStride];\
765  const int src4= src[4*srcStride];\
766  const int src5= src[5*srcStride];\
767  const int src6= src[6*srcStride];\
768  const int src7= src[7*srcStride];\
769  const int src8= src[8*srcStride];\
770  OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
771  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
772  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
773  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
774  OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
775  OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
776  OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
777  OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
778  dst++;\
779  src++;\
780  }\
781 }\
782 \
783 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
784  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
785  int i;\
786  \
787  for(i=0; i<h; i++)\
788  {\
789  OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
790  OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
791  OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
792  OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
793  OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
794  OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
795  OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
796  OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
797  OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
798  OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
799  OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
800  OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
801  OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
802  OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
803  OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
804  OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
805  dst+=dstStride;\
806  src+=srcStride;\
807  }\
808 }\
809 \
810 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
811  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
812  int i;\
813  const int w=16;\
814  for(i=0; i<w; i++)\
815  {\
816  const int src0= src[0*srcStride];\
817  const int src1= src[1*srcStride];\
818  const int src2= src[2*srcStride];\
819  const int src3= src[3*srcStride];\
820  const int src4= src[4*srcStride];\
821  const int src5= src[5*srcStride];\
822  const int src6= src[6*srcStride];\
823  const int src7= src[7*srcStride];\
824  const int src8= src[8*srcStride];\
825  const int src9= src[9*srcStride];\
826  const int src10= src[10*srcStride];\
827  const int src11= src[11*srcStride];\
828  const int src12= src[12*srcStride];\
829  const int src13= src[13*srcStride];\
830  const int src14= src[14*srcStride];\
831  const int src15= src[15*srcStride];\
832  const int src16= src[16*srcStride];\
833  OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
834  OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
835  OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
836  OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
837  OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
838  OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
839  OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
840  OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
841  OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
842  OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
843  OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
844  OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
845  OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
846  OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
847  OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
848  OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
849  dst++;\
850  src++;\
851  }\
852 }\
853 \
854 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
855 {\
856  uint8_t half[64];\
857  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
858  OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
859 }\
860 \
861 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
862 {\
863  OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
864 }\
865 \
866 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
867 {\
868  uint8_t half[64];\
869  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
870  OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
871 }\
872 \
873 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
874 {\
875  uint8_t full[16*9];\
876  uint8_t half[64];\
877  copy_block9(full, src, 16, stride, 9);\
878  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
879  OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
880 }\
881 \
882 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
883 {\
884  uint8_t full[16*9];\
885  copy_block9(full, src, 16, stride, 9);\
886  OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
887 }\
888 \
889 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
890 {\
891  uint8_t full[16*9];\
892  uint8_t half[64];\
893  copy_block9(full, src, 16, stride, 9);\
894  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
895  OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
896 }\
897 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
898 {\
899  uint8_t full[16*9];\
900  uint8_t halfH[72];\
901  uint8_t halfV[64];\
902  uint8_t halfHV[64];\
903  copy_block9(full, src, 16, stride, 9);\
904  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
905  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
906  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
907  OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
908 }\
909 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
910 {\
911  uint8_t full[16*9];\
912  uint8_t halfH[72];\
913  uint8_t halfHV[64];\
914  copy_block9(full, src, 16, stride, 9);\
915  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
916  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
917  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
918  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
919 }\
920 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
921 {\
922  uint8_t full[16*9];\
923  uint8_t halfH[72];\
924  uint8_t halfV[64];\
925  uint8_t halfHV[64];\
926  copy_block9(full, src, 16, stride, 9);\
927  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
928  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
929  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
930  OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
931 }\
932 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
933 {\
934  uint8_t full[16*9];\
935  uint8_t halfH[72];\
936  uint8_t halfHV[64];\
937  copy_block9(full, src, 16, stride, 9);\
938  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
939  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
940  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
941  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
942 }\
943 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
944 {\
945  uint8_t full[16*9];\
946  uint8_t halfH[72];\
947  uint8_t halfV[64];\
948  uint8_t halfHV[64];\
949  copy_block9(full, src, 16, stride, 9);\
950  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
951  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
952  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
953  OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
954 }\
955 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
956 {\
957  uint8_t full[16*9];\
958  uint8_t halfH[72];\
959  uint8_t halfHV[64];\
960  copy_block9(full, src, 16, stride, 9);\
961  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
962  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
963  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
964  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
965 }\
966 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
967 {\
968  uint8_t full[16*9];\
969  uint8_t halfH[72];\
970  uint8_t halfV[64];\
971  uint8_t halfHV[64];\
972  copy_block9(full, src, 16, stride, 9);\
973  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
974  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
975  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
976  OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
977 }\
978 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
979 {\
980  uint8_t full[16*9];\
981  uint8_t halfH[72];\
982  uint8_t halfHV[64];\
983  copy_block9(full, src, 16, stride, 9);\
984  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
985  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
986  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
987  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
988 }\
989 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
990 {\
991  uint8_t halfH[72];\
992  uint8_t halfHV[64];\
993  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
994  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
995  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
996 }\
997 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
998 {\
999  uint8_t halfH[72];\
1000  uint8_t halfHV[64];\
1001  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1002  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1004 }\
1005 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1006 {\
1007  uint8_t full[16*9];\
1008  uint8_t halfH[72];\
1009  uint8_t halfV[64];\
1010  uint8_t halfHV[64];\
1011  copy_block9(full, src, 16, stride, 9);\
1012  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1013  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1014  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1015  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1016 }\
1017 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1018 {\
1019  uint8_t full[16*9];\
1020  uint8_t halfH[72];\
1021  copy_block9(full, src, 16, stride, 9);\
1022  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1023  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1024  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1025 }\
1026 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1027 {\
1028  uint8_t full[16*9];\
1029  uint8_t halfH[72];\
1030  uint8_t halfV[64];\
1031  uint8_t halfHV[64];\
1032  copy_block9(full, src, 16, stride, 9);\
1033  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1034  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1035  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1036  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1037 }\
1038 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1039 {\
1040  uint8_t full[16*9];\
1041  uint8_t halfH[72];\
1042  copy_block9(full, src, 16, stride, 9);\
1043  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1044  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1045  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1046 }\
1047 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1048 {\
1049  uint8_t halfH[72];\
1050  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1051  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1052 }\
1053 \
1054 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1055 {\
1056  uint8_t half[256];\
1057  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1058  OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1059 }\
1060 \
1061 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1062 {\
1063  OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1064 }\
1065 \
1066 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1067 {\
1068  uint8_t half[256];\
1069  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1070  OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1071 }\
1072 \
1073 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1074 {\
1075  uint8_t full[24*17];\
1076  uint8_t half[256];\
1077  copy_block17(full, src, 24, stride, 17);\
1078  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1079  OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1080 }\
1081 \
1082 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1083 {\
1084  uint8_t full[24*17];\
1085  copy_block17(full, src, 24, stride, 17);\
1086  OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1087 }\
1088 \
1089 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1090 {\
1091  uint8_t full[24*17];\
1092  uint8_t half[256];\
1093  copy_block17(full, src, 24, stride, 17);\
1094  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1095  OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1096 }\
1097 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1098 {\
1099  uint8_t full[24*17];\
1100  uint8_t halfH[272];\
1101  uint8_t halfV[256];\
1102  uint8_t halfHV[256];\
1103  copy_block17(full, src, 24, stride, 17);\
1104  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1105  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1106  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1107  OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1108 }\
1109 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1110 {\
1111  uint8_t full[24*17];\
1112  uint8_t halfH[272];\
1113  uint8_t halfHV[256];\
1114  copy_block17(full, src, 24, stride, 17);\
1115  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1116  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1117  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1118  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1119 }\
1120 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1121 {\
1122  uint8_t full[24*17];\
1123  uint8_t halfH[272];\
1124  uint8_t halfV[256];\
1125  uint8_t halfHV[256];\
1126  copy_block17(full, src, 24, stride, 17);\
1127  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1128  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1129  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1130  OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1131 }\
1132 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1133 {\
1134  uint8_t full[24*17];\
1135  uint8_t halfH[272];\
1136  uint8_t halfHV[256];\
1137  copy_block17(full, src, 24, stride, 17);\
1138  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1139  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1140  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1141  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1142 }\
1143 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1144 {\
1145  uint8_t full[24*17];\
1146  uint8_t halfH[272];\
1147  uint8_t halfV[256];\
1148  uint8_t halfHV[256];\
1149  copy_block17(full, src, 24, stride, 17);\
1150  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1151  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1152  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1153  OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1154 }\
1155 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1156 {\
1157  uint8_t full[24*17];\
1158  uint8_t halfH[272];\
1159  uint8_t halfHV[256];\
1160  copy_block17(full, src, 24, stride, 17);\
1161  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1162  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1163  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1164  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1165 }\
1166 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1167 {\
1168  uint8_t full[24*17];\
1169  uint8_t halfH[272];\
1170  uint8_t halfV[256];\
1171  uint8_t halfHV[256];\
1172  copy_block17(full, src, 24, stride, 17);\
1173  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1174  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1175  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1176  OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1177 }\
1178 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1179 {\
1180  uint8_t full[24*17];\
1181  uint8_t halfH[272];\
1182  uint8_t halfHV[256];\
1183  copy_block17(full, src, 24, stride, 17);\
1184  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1185  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1186  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1187  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1188 }\
1189 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1190 {\
1191  uint8_t halfH[272];\
1192  uint8_t halfHV[256];\
1193  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1194  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1195  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1196 }\
1197 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1198 {\
1199  uint8_t halfH[272];\
1200  uint8_t halfHV[256];\
1201  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1203  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1204 }\
1205 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1206 {\
1207  uint8_t full[24*17];\
1208  uint8_t halfH[272];\
1209  uint8_t halfV[256];\
1210  uint8_t halfHV[256];\
1211  copy_block17(full, src, 24, stride, 17);\
1212  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1213  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1214  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1215  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1216 }\
1217 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1218 {\
1219  uint8_t full[24*17];\
1220  uint8_t halfH[272];\
1221  copy_block17(full, src, 24, stride, 17);\
1222  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1223  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1224  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1225 }\
1226 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1227 {\
1228  uint8_t full[24*17];\
1229  uint8_t halfH[272];\
1230  uint8_t halfV[256];\
1231  uint8_t halfHV[256];\
1232  copy_block17(full, src, 24, stride, 17);\
1233  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1234  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1235  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1236  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1237 }\
1238 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1239 {\
1240  uint8_t full[24*17];\
1241  uint8_t halfH[272];\
1242  copy_block17(full, src, 24, stride, 17);\
1243  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1244  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1245  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1246 }\
1247 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1248 {\
1249  uint8_t halfH[272];\
1250  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1251  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1252 }
1253 
1254 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1255 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1256 #define op_put(a, b) a = cm[((b) + 16)>>5]
1257 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1258 
1259 QPEL_MC(0, put_ , _ , op_put)
1260 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1261 QPEL_MC(0, avg_ , _ , op_avg)
1262 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1263 #undef op_avg
1264 #undef op_avg_no_rnd
1265 #undef op_put
1266 #undef op_put_no_rnd
1267 
1268 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1269 {
1270  put_pixels8_8_c(dst, src, stride, 8);
1271 }
1272 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1273 {
1274  avg_pixels8_8_c(dst, src, stride, 8);
1275 }
1276 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1277 {
1278  put_pixels16_8_c(dst, src, stride, 16);
1279 }
1280 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1281 {
1282  avg_pixels16_8_c(dst, src, stride, 16);
1283 }
1284 
1285 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1286 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1287 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1288 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1289 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1290 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1291 
1292 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1293  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1294  int i;
1295 
1296  for(i=0; i<h; i++){
1297  dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1298  dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1299  dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1300  dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1301  dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1302  dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1303  dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1304  dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1305  dst+=dstStride;
1306  src+=srcStride;
1307  }
1308 }
1309 
1310 #if CONFIG_RV40_DECODER
1311 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1312 {
1313  put_pixels16_xy2_8_c(dst, src, stride, 16);
1314 }
1315 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1316 {
1317  avg_pixels16_xy2_8_c(dst, src, stride, 16);
1318 }
1319 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1320 {
1321  put_pixels8_xy2_8_c(dst, src, stride, 8);
1322 }
1323 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1324 {
1325  avg_pixels8_xy2_8_c(dst, src, stride, 8);
1326 }
1327 #endif /* CONFIG_RV40_DECODER */
1328 
1329 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1330  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1331  int i;
1332 
1333  for(i=0; i<w; i++){
1334  const int src_1= src[ -srcStride];
1335  const int src0 = src[0 ];
1336  const int src1 = src[ srcStride];
1337  const int src2 = src[2*srcStride];
1338  const int src3 = src[3*srcStride];
1339  const int src4 = src[4*srcStride];
1340  const int src5 = src[5*srcStride];
1341  const int src6 = src[6*srcStride];
1342  const int src7 = src[7*srcStride];
1343  const int src8 = src[8*srcStride];
1344  const int src9 = src[9*srcStride];
1345  dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1346  dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1347  dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1348  dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1349  dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1350  dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1351  dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1352  dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1353  src++;
1354  dst++;
1355  }
1356 }
1357 
1358 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1359 {
1360  uint8_t half[64];
1361  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1362  put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1363 }
1364 
1365 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1366 {
1367  wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1368 }
1369 
1370 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1371 {
1372  uint8_t half[64];
1373  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1374  put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1375 }
1376 
1377 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1378 {
1379  wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1380 }
1381 
1382 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1383 {
1384  uint8_t halfH[88];
1385  uint8_t halfV[64];
1386  uint8_t halfHV[64];
1387  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1388  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1389  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1390  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1391 }
1392 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1393 {
1394  uint8_t halfH[88];
1395  uint8_t halfV[64];
1396  uint8_t halfHV[64];
1397  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1398  wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1399  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1400  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1401 }
1402 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1403 {
1404  uint8_t halfH[88];
1405  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1406  wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1407 }
1408 
1409 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1410 {
1411  int s, i;
1412 
1413  s = 0;
1414  for(i=0;i<h;i++) {
1415  s += abs(pix1[0] - pix2[0]);
1416  s += abs(pix1[1] - pix2[1]);
1417  s += abs(pix1[2] - pix2[2]);
1418  s += abs(pix1[3] - pix2[3]);
1419  s += abs(pix1[4] - pix2[4]);
1420  s += abs(pix1[5] - pix2[5]);
1421  s += abs(pix1[6] - pix2[6]);
1422  s += abs(pix1[7] - pix2[7]);
1423  s += abs(pix1[8] - pix2[8]);
1424  s += abs(pix1[9] - pix2[9]);
1425  s += abs(pix1[10] - pix2[10]);
1426  s += abs(pix1[11] - pix2[11]);
1427  s += abs(pix1[12] - pix2[12]);
1428  s += abs(pix1[13] - pix2[13]);
1429  s += abs(pix1[14] - pix2[14]);
1430  s += abs(pix1[15] - pix2[15]);
1431  pix1 += line_size;
1432  pix2 += line_size;
1433  }
1434  return s;
1435 }
1436 
1437 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1438 {
1439  int s, i;
1440 
1441  s = 0;
1442  for(i=0;i<h;i++) {
1443  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1444  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1445  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1446  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1447  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1448  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1449  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1450  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1451  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1452  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1453  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1454  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1455  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1456  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1457  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1458  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1459  pix1 += line_size;
1460  pix2 += line_size;
1461  }
1462  return s;
1463 }
1464 
1465 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1466 {
1467  int s, i;
1468  uint8_t *pix3 = pix2 + line_size;
1469 
1470  s = 0;
1471  for(i=0;i<h;i++) {
1472  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1473  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1474  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1475  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1476  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1477  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1478  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1479  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1480  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1481  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1482  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1483  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1484  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1485  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1486  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1487  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1488  pix1 += line_size;
1489  pix2 += line_size;
1490  pix3 += line_size;
1491  }
1492  return s;
1493 }
1494 
1495 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1496 {
1497  int s, i;
1498  uint8_t *pix3 = pix2 + line_size;
1499 
1500  s = 0;
1501  for(i=0;i<h;i++) {
1502  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1503  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1504  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1505  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1506  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1507  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1508  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1509  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1510  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1511  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1512  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1513  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1514  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1515  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1516  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1517  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1518  pix1 += line_size;
1519  pix2 += line_size;
1520  pix3 += line_size;
1521  }
1522  return s;
1523 }
1524 
1525 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1526 {
1527  int s, i;
1528 
1529  s = 0;
1530  for(i=0;i<h;i++) {
1531  s += abs(pix1[0] - pix2[0]);
1532  s += abs(pix1[1] - pix2[1]);
1533  s += abs(pix1[2] - pix2[2]);
1534  s += abs(pix1[3] - pix2[3]);
1535  s += abs(pix1[4] - pix2[4]);
1536  s += abs(pix1[5] - pix2[5]);
1537  s += abs(pix1[6] - pix2[6]);
1538  s += abs(pix1[7] - pix2[7]);
1539  pix1 += line_size;
1540  pix2 += line_size;
1541  }
1542  return s;
1543 }
1544 
1545 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1546 {
1547  int s, i;
1548 
1549  s = 0;
1550  for(i=0;i<h;i++) {
1551  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1552  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1553  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1554  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1555  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1556  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1557  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1558  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1559  pix1 += line_size;
1560  pix2 += line_size;
1561  }
1562  return s;
1563 }
1564 
1565 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1566 {
1567  int s, i;
1568  uint8_t *pix3 = pix2 + line_size;
1569 
1570  s = 0;
1571  for(i=0;i<h;i++) {
1572  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1573  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1574  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1575  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1576  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1577  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1578  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1579  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1580  pix1 += line_size;
1581  pix2 += line_size;
1582  pix3 += line_size;
1583  }
1584  return s;
1585 }
1586 
1587 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1588 {
1589  int s, i;
1590  uint8_t *pix3 = pix2 + line_size;
1591 
1592  s = 0;
1593  for(i=0;i<h;i++) {
1594  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1595  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1596  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1597  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1598  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1599  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1600  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1601  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1602  pix1 += line_size;
1603  pix2 += line_size;
1604  pix3 += line_size;
1605  }
1606  return s;
1607 }
1608 
1609 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1610  MpegEncContext *c = v;
1611  int score1=0;
1612  int score2=0;
1613  int x,y;
1614 
1615  for(y=0; y<h; y++){
1616  for(x=0; x<16; x++){
1617  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1618  }
1619  if(y+1<h){
1620  for(x=0; x<15; x++){
1621  score2+= FFABS( s1[x ] - s1[x +stride]
1622  - s1[x+1] + s1[x+1+stride])
1623  -FFABS( s2[x ] - s2[x +stride]
1624  - s2[x+1] + s2[x+1+stride]);
1625  }
1626  }
1627  s1+= stride;
1628  s2+= stride;
1629  }
1630 
1631  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1632  else return score1 + FFABS(score2)*8;
1633 }
1634 
1635 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1636  MpegEncContext *c = v;
1637  int score1=0;
1638  int score2=0;
1639  int x,y;
1640 
1641  for(y=0; y<h; y++){
1642  for(x=0; x<8; x++){
1643  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1644  }
1645  if(y+1<h){
1646  for(x=0; x<7; x++){
1647  score2+= FFABS( s1[x ] - s1[x +stride]
1648  - s1[x+1] + s1[x+1+stride])
1649  -FFABS( s2[x ] - s2[x +stride]
1650  - s2[x+1] + s2[x+1+stride]);
1651  }
1652  }
1653  s1+= stride;
1654  s2+= stride;
1655  }
1656 
1657  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1658  else return score1 + FFABS(score2)*8;
1659 }
1660 
1661 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1662  int i;
1663  unsigned int sum=0;
1664 
1665  for(i=0; i<8*8; i++){
1666  int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1667  int w= weight[i];
1668  b>>= RECON_SHIFT;
1669  assert(-512<b && b<512);
1670 
1671  sum += (w*b)*(w*b)>>4;
1672  }
1673  return sum>>2;
1674 }
1675 
1676 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1677  int i;
1678 
1679  for(i=0; i<8*8; i++){
1680  rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1681  }
1682 }
1683 
1684 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1685  return 0;
1686 }
1687 
1688 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1689  int i;
1690 
1691  memset(cmp, 0, sizeof(void*)*6);
1692 
1693  for(i=0; i<6; i++){
1694  switch(type&0xFF){
1695  case FF_CMP_SAD:
1696  cmp[i]= c->sad[i];
1697  break;
1698  case FF_CMP_SATD:
1699  cmp[i]= c->hadamard8_diff[i];
1700  break;
1701  case FF_CMP_SSE:
1702  cmp[i]= c->sse[i];
1703  break;
1704  case FF_CMP_DCT:
1705  cmp[i]= c->dct_sad[i];
1706  break;
1707  case FF_CMP_DCT264:
1708  cmp[i]= c->dct264_sad[i];
1709  break;
1710  case FF_CMP_DCTMAX:
1711  cmp[i]= c->dct_max[i];
1712  break;
1713  case FF_CMP_PSNR:
1714  cmp[i]= c->quant_psnr[i];
1715  break;
1716  case FF_CMP_BIT:
1717  cmp[i]= c->bit[i];
1718  break;
1719  case FF_CMP_RD:
1720  cmp[i]= c->rd[i];
1721  break;
1722  case FF_CMP_VSAD:
1723  cmp[i]= c->vsad[i];
1724  break;
1725  case FF_CMP_VSSE:
1726  cmp[i]= c->vsse[i];
1727  break;
1728  case FF_CMP_ZERO:
1729  cmp[i]= zero_cmp;
1730  break;
1731  case FF_CMP_NSSE:
1732  cmp[i]= c->nsse[i];
1733  break;
1734  default:
1735  av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1736  }
1737  }
1738 }
1739 
1740 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1741  long i;
1742  for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1743  long a = *(long*)(src+i);
1744  long b = *(long*)(dst+i);
1745  *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1746  }
1747  for(; i<w; i++)
1748  dst[i+0] += src[i+0];
1749 }
1750 
1751 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1752  long i;
1753 #if !HAVE_FAST_UNALIGNED
1754  if((long)src2 & (sizeof(long)-1)){
1755  for(i=0; i+7<w; i+=8){
1756  dst[i+0] = src1[i+0]-src2[i+0];
1757  dst[i+1] = src1[i+1]-src2[i+1];
1758  dst[i+2] = src1[i+2]-src2[i+2];
1759  dst[i+3] = src1[i+3]-src2[i+3];
1760  dst[i+4] = src1[i+4]-src2[i+4];
1761  dst[i+5] = src1[i+5]-src2[i+5];
1762  dst[i+6] = src1[i+6]-src2[i+6];
1763  dst[i+7] = src1[i+7]-src2[i+7];
1764  }
1765  }else
1766 #endif
1767  for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1768  long a = *(long*)(src1+i);
1769  long b = *(long*)(src2+i);
1770  *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1771  }
1772  for(; i<w; i++)
1773  dst[i+0] = src1[i+0]-src2[i+0];
1774 }
1775 
1776 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1777  int i;
1778  uint8_t l, lt;
1779 
1780  l= *left;
1781  lt= *left_top;
1782 
1783  for(i=0; i<w; i++){
1784  l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1785  lt= src1[i];
1786  dst[i]= l;
1787  }
1788 
1789  *left= l;
1790  *left_top= lt;
1791 }
1792 
1793 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1794  int i;
1795  uint8_t l, lt;
1796 
1797  l= *left;
1798  lt= *left_top;
1799 
1800  for(i=0; i<w; i++){
1801  const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1802  lt= src1[i];
1803  l= src2[i];
1804  dst[i]= l - pred;
1805  }
1806 
1807  *left= l;
1808  *left_top= lt;
1809 }
1810 
1811 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1812  int i;
1813 
1814  for(i=0; i<w-1; i++){
1815  acc+= src[i];
1816  dst[i]= acc;
1817  i++;
1818  acc+= src[i];
1819  dst[i]= acc;
1820  }
1821 
1822  for(; i<w; i++){
1823  acc+= src[i];
1824  dst[i]= acc;
1825  }
1826 
1827  return acc;
1828 }
1829 
1830 #if HAVE_BIGENDIAN
1831 #define B 3
1832 #define G 2
1833 #define R 1
1834 #define A 0
1835 #else
1836 #define B 0
1837 #define G 1
1838 #define R 2
1839 #define A 3
1840 #endif
1841 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1842  int i;
1843  int r,g,b,a;
1844  r= *red;
1845  g= *green;
1846  b= *blue;
1847  a= *alpha;
1848 
1849  for(i=0; i<w; i++){
1850  b+= src[4*i+B];
1851  g+= src[4*i+G];
1852  r+= src[4*i+R];
1853  a+= src[4*i+A];
1854 
1855  dst[4*i+B]= b;
1856  dst[4*i+G]= g;
1857  dst[4*i+R]= r;
1858  dst[4*i+A]= a;
1859  }
1860 
1861  *red= r;
1862  *green= g;
1863  *blue= b;
1864  *alpha= a;
1865 }
1866 #undef B
1867 #undef G
1868 #undef R
1869 #undef A
1870 
1871 #define BUTTERFLY2(o1,o2,i1,i2) \
1872 o1= (i1)+(i2);\
1873 o2= (i1)-(i2);
1874 
1875 #define BUTTERFLY1(x,y) \
1876 {\
1877  int a,b;\
1878  a= x;\
1879  b= y;\
1880  x= a+b;\
1881  y= a-b;\
1882 }
1883 
1884 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1885 
1886 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1887  int i;
1888  int temp[64];
1889  int sum=0;
1890 
1891  assert(h==8);
1892 
1893  for(i=0; i<8; i++){
1894  //FIXME try pointer walks
1895  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1896  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1897  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1898  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1899 
1900  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1901  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1902  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1903  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1904 
1905  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1906  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1907  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1908  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1909  }
1910 
1911  for(i=0; i<8; i++){
1912  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1913  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1914  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1915  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1916 
1917  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1918  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1919  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1920  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1921 
1922  sum +=
1923  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1924  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1925  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1926  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1927  }
1928  return sum;
1929 }
1930 
1931 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1932  int i;
1933  int temp[64];
1934  int sum=0;
1935 
1936  assert(h==8);
1937 
1938  for(i=0; i<8; i++){
1939  //FIXME try pointer walks
1940  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1941  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1942  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1943  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1944 
1945  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1946  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1947  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1948  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1949 
1950  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1951  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1952  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1953  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1954  }
1955 
1956  for(i=0; i<8; i++){
1957  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1958  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1959  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1960  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1961 
1962  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1963  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1964  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1965  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1966 
1967  sum +=
1968  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1969  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1970  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1971  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1972  }
1973 
1974  sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
1975 
1976  return sum;
1977 }
1978 
1979 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
1980  MpegEncContext * const s= (MpegEncContext *)c;
1981  LOCAL_ALIGNED_16(int16_t, temp, [64]);
1982 
1983  assert(h==8);
1984 
1985  s->dsp.diff_pixels(temp, src1, src2, stride);
1986  s->dsp.fdct(temp);
1987  return s->dsp.sum_abs_dctelem(temp);
1988 }
1989 
1990 #if CONFIG_GPL
1991 #define DCT8_1D {\
1992  const int s07 = SRC(0) + SRC(7);\
1993  const int s16 = SRC(1) + SRC(6);\
1994  const int s25 = SRC(2) + SRC(5);\
1995  const int s34 = SRC(3) + SRC(4);\
1996  const int a0 = s07 + s34;\
1997  const int a1 = s16 + s25;\
1998  const int a2 = s07 - s34;\
1999  const int a3 = s16 - s25;\
2000  const int d07 = SRC(0) - SRC(7);\
2001  const int d16 = SRC(1) - SRC(6);\
2002  const int d25 = SRC(2) - SRC(5);\
2003  const int d34 = SRC(3) - SRC(4);\
2004  const int a4 = d16 + d25 + (d07 + (d07>>1));\
2005  const int a5 = d07 - d34 - (d25 + (d25>>1));\
2006  const int a6 = d07 + d34 - (d16 + (d16>>1));\
2007  const int a7 = d16 - d25 + (d34 + (d34>>1));\
2008  DST(0, a0 + a1 ) ;\
2009  DST(1, a4 + (a7>>2)) ;\
2010  DST(2, a2 + (a3>>1)) ;\
2011  DST(3, a5 + (a6>>2)) ;\
2012  DST(4, a0 - a1 ) ;\
2013  DST(5, a6 - (a5>>2)) ;\
2014  DST(6, (a2>>1) - a3 ) ;\
2015  DST(7, (a4>>2) - a7 ) ;\
2016 }
2017 
2018 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2019  MpegEncContext * const s= (MpegEncContext *)c;
2020  int16_t dct[8][8];
2021  int i;
2022  int sum=0;
2023 
2024  s->dsp.diff_pixels(dct[0], src1, src2, stride);
2025 
2026 #define SRC(x) dct[i][x]
2027 #define DST(x,v) dct[i][x]= v
2028  for( i = 0; i < 8; i++ )
2029  DCT8_1D
2030 #undef SRC
2031 #undef DST
2032 
2033 #define SRC(x) dct[x][i]
2034 #define DST(x,v) sum += FFABS(v)
2035  for( i = 0; i < 8; i++ )
2036  DCT8_1D
2037 #undef SRC
2038 #undef DST
2039  return sum;
2040 }
2041 #endif
2042 
2043 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2044  MpegEncContext * const s= (MpegEncContext *)c;
2045  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2046  int sum=0, i;
2047 
2048  assert(h==8);
2049 
2050  s->dsp.diff_pixels(temp, src1, src2, stride);
2051  s->dsp.fdct(temp);
2052 
2053  for(i=0; i<64; i++)
2054  sum= FFMAX(sum, FFABS(temp[i]));
2055 
2056  return sum;
2057 }
2058 
2059 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2060  MpegEncContext * const s= (MpegEncContext *)c;
2061  LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2062  int16_t * const bak = temp+64;
2063  int sum=0, i;
2064 
2065  assert(h==8);
2066  s->mb_intra=0;
2067 
2068  s->dsp.diff_pixels(temp, src1, src2, stride);
2069 
2070  memcpy(bak, temp, 64*sizeof(int16_t));
2071 
2072  s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2073  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2074  ff_simple_idct_8(temp); //FIXME
2075 
2076  for(i=0; i<64; i++)
2077  sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2078 
2079  return sum;
2080 }
2081 
2082 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2083  MpegEncContext * const s= (MpegEncContext *)c;
2084  const uint8_t *scantable= s->intra_scantable.permutated;
2085  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2086  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2087  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2088  int i, last, run, bits, level, distortion, start_i;
2089  const int esc_length= s->ac_esc_length;
2090  uint8_t * length;
2091  uint8_t * last_length;
2092 
2093  assert(h==8);
2094 
2095  copy_block8(lsrc1, src1, 8, stride, 8);
2096  copy_block8(lsrc2, src2, 8, stride, 8);
2097 
2098  s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2099 
2100  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2101 
2102  bits=0;
2103 
2104  if (s->mb_intra) {
2105  start_i = 1;
2106  length = s->intra_ac_vlc_length;
2107  last_length= s->intra_ac_vlc_last_length;
2108  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2109  } else {
2110  start_i = 0;
2111  length = s->inter_ac_vlc_length;
2112  last_length= s->inter_ac_vlc_last_length;
2113  }
2114 
2115  if(last>=start_i){
2116  run=0;
2117  for(i=start_i; i<last; i++){
2118  int j= scantable[i];
2119  level= temp[j];
2120 
2121  if(level){
2122  level+=64;
2123  if((level&(~127)) == 0){
2124  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2125  }else
2126  bits+= esc_length;
2127  run=0;
2128  }else
2129  run++;
2130  }
2131  i= scantable[last];
2132 
2133  level= temp[i] + 64;
2134 
2135  assert(level - 64);
2136 
2137  if((level&(~127)) == 0){
2138  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2139  }else
2140  bits+= esc_length;
2141 
2142  }
2143 
2144  if(last>=0){
2145  if(s->mb_intra)
2146  s->dct_unquantize_intra(s, temp, 0, s->qscale);
2147  else
2148  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2149  }
2150 
2151  s->dsp.idct_add(lsrc2, 8, temp);
2152 
2153  distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2154 
2155  return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2156 }
2157 
2158 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2159  MpegEncContext * const s= (MpegEncContext *)c;
2160  const uint8_t *scantable= s->intra_scantable.permutated;
2161  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2162  int i, last, run, bits, level, start_i;
2163  const int esc_length= s->ac_esc_length;
2164  uint8_t * length;
2165  uint8_t * last_length;
2166 
2167  assert(h==8);
2168 
2169  s->dsp.diff_pixels(temp, src1, src2, stride);
2170 
2171  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2172 
2173  bits=0;
2174 
2175  if (s->mb_intra) {
2176  start_i = 1;
2177  length = s->intra_ac_vlc_length;
2178  last_length= s->intra_ac_vlc_last_length;
2179  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2180  } else {
2181  start_i = 0;
2182  length = s->inter_ac_vlc_length;
2183  last_length= s->inter_ac_vlc_last_length;
2184  }
2185 
2186  if(last>=start_i){
2187  run=0;
2188  for(i=start_i; i<last; i++){
2189  int j= scantable[i];
2190  level= temp[j];
2191 
2192  if(level){
2193  level+=64;
2194  if((level&(~127)) == 0){
2195  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2196  }else
2197  bits+= esc_length;
2198  run=0;
2199  }else
2200  run++;
2201  }
2202  i= scantable[last];
2203 
2204  level= temp[i] + 64;
2205 
2206  assert(level - 64);
2207 
2208  if((level&(~127)) == 0){
2209  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2210  }else
2211  bits+= esc_length;
2212  }
2213 
2214  return bits;
2215 }
2216 
2217 #define VSAD_INTRA(size) \
2218 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2219  int score=0; \
2220  int x,y; \
2221  \
2222  for(y=1; y<h; y++){ \
2223  for(x=0; x<size; x+=4){ \
2224  score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2225  +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2226  } \
2227  s+= stride; \
2228  } \
2229  \
2230  return score; \
2231 }
2232 VSAD_INTRA(8)
2233 VSAD_INTRA(16)
2234 
2235 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2236  int score=0;
2237  int x,y;
2238 
2239  for(y=1; y<h; y++){
2240  for(x=0; x<16; x++){
2241  score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2242  }
2243  s1+= stride;
2244  s2+= stride;
2245  }
2246 
2247  return score;
2248 }
2249 
2250 #define SQ(a) ((a)*(a))
2251 #define VSSE_INTRA(size) \
2252 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2253  int score=0; \
2254  int x,y; \
2255  \
2256  for(y=1; y<h; y++){ \
2257  for(x=0; x<size; x+=4){ \
2258  score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2259  +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2260  } \
2261  s+= stride; \
2262  } \
2263  \
2264  return score; \
2265 }
2266 VSSE_INTRA(8)
2267 VSSE_INTRA(16)
2268 
2269 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2270  int score=0;
2271  int x,y;
2272 
2273  for(y=1; y<h; y++){
2274  for(x=0; x<16; x++){
2275  score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2276  }
2277  s1+= stride;
2278  s2+= stride;
2279  }
2280 
2281  return score;
2282 }
2283 
2284 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2285  int size){
2286  int score=0;
2287  int i;
2288  for(i=0; i<size; i++)
2289  score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2290  return score;
2291 }
2292 
2293 #define WRAPPER8_16_SQ(name8, name16)\
2294 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2295  int score=0;\
2296  score +=name8(s, dst , src , stride, 8);\
2297  score +=name8(s, dst+8 , src+8 , stride, 8);\
2298  if(h==16){\
2299  dst += 8*stride;\
2300  src += 8*stride;\
2301  score +=name8(s, dst , src , stride, 8);\
2302  score +=name8(s, dst+8 , src+8 , stride, 8);\
2303  }\
2304  return score;\
2305 }
2306 
2307 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2308 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2309 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2310 #if CONFIG_GPL
2311 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2312 #endif
2313 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2314 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2315 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2316 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2317 
2318 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2319  uint32_t maxi, uint32_t maxisign)
2320 {
2321 
2322  if(a > mini) return mini;
2323  else if((a^(1U<<31)) > maxisign) return maxi;
2324  else return a;
2325 }
2326 
2327 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2328  int i;
2329  uint32_t mini = *(uint32_t*)min;
2330  uint32_t maxi = *(uint32_t*)max;
2331  uint32_t maxisign = maxi ^ (1U<<31);
2332  uint32_t *dsti = (uint32_t*)dst;
2333  const uint32_t *srci = (const uint32_t*)src;
2334  for(i=0; i<len; i+=8) {
2335  dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2336  dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2337  dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2338  dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2339  dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2340  dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2341  dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2342  dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2343  }
2344 }
2345 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2346  int i;
2347  if(min < 0 && max > 0) {
2348  vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2349  } else {
2350  for(i=0; i < len; i+=8) {
2351  dst[i ] = av_clipf(src[i ], min, max);
2352  dst[i + 1] = av_clipf(src[i + 1], min, max);
2353  dst[i + 2] = av_clipf(src[i + 2], min, max);
2354  dst[i + 3] = av_clipf(src[i + 3], min, max);
2355  dst[i + 4] = av_clipf(src[i + 4], min, max);
2356  dst[i + 5] = av_clipf(src[i + 5], min, max);
2357  dst[i + 6] = av_clipf(src[i + 6], min, max);
2358  dst[i + 7] = av_clipf(src[i + 7], min, max);
2359  }
2360  }
2361 }
2362 
2363 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2364 {
2365  int res = 0;
2366 
2367  while (order--)
2368  res += *v1++ * *v2++;
2369 
2370  return res;
2371 }
2372 
2373 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2374 {
2375  int res = 0;
2376  while (order--) {
2377  res += *v1 * *v2++;
2378  *v1++ += mul * *v3++;
2379  }
2380  return res;
2381 }
2382 
2383 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2384  int32_t max, unsigned int len)
2385 {
2386  do {
2387  *dst++ = av_clip(*src++, min, max);
2388  *dst++ = av_clip(*src++, min, max);
2389  *dst++ = av_clip(*src++, min, max);
2390  *dst++ = av_clip(*src++, min, max);
2391  *dst++ = av_clip(*src++, min, max);
2392  *dst++ = av_clip(*src++, min, max);
2393  *dst++ = av_clip(*src++, min, max);
2394  *dst++ = av_clip(*src++, min, max);
2395  len -= 8;
2396  } while (len > 0);
2397 }
2398 
2399 static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2400 {
2401  ff_j_rev_dct (block);
2402  put_pixels_clamped_c(block, dest, line_size);
2403 }
2404 static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2405 {
2406  ff_j_rev_dct (block);
2407  add_pixels_clamped_c(block, dest, line_size);
2408 }
2409 
2410 /* init static data */
2412 {
2413  int i;
2414 
2415  for(i=0;i<512;i++) {
2416  ff_squareTbl[i] = (i - 256) * (i - 256);
2417  }
2418 }
2419 
2421  static int did_fail=0;
2422  LOCAL_ALIGNED_16(int, aligned, [4]);
2423 
2424  if((intptr_t)aligned & 15){
2425  if(!did_fail){
2426 #if HAVE_MMX || HAVE_ALTIVEC
2428  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2429  "and may be very slow or crash. This is not a bug in libavcodec,\n"
2430  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2431  "Do not report crashes to Libav developers.\n");
2432 #endif
2433  did_fail=1;
2434  }
2435  return -1;
2436  }
2437  return 0;
2438 }
2439 
2441 {
2443 
2444 #if CONFIG_ENCODERS
2445  if (avctx->bits_per_raw_sample == 10) {
2448  } else {
2449  if(avctx->dct_algo==FF_DCT_FASTINT) {
2450  c->fdct = ff_fdct_ifast;
2452  }
2453  else if(avctx->dct_algo==FF_DCT_FAAN) {
2454  c->fdct = ff_faandct;
2455  c->fdct248 = ff_faandct248;
2456  }
2457  else {
2458  c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2460  }
2461  }
2462 #endif //CONFIG_ENCODERS
2463 
2464  if (avctx->bits_per_raw_sample == 10) {
2467  c->idct = ff_simple_idct_10;
2469  } else {
2470  if(avctx->idct_algo==FF_IDCT_INT){
2471  c->idct_put= jref_idct_put;
2472  c->idct_add= jref_idct_add;
2473  c->idct = ff_j_rev_dct;
2475  }else if(avctx->idct_algo==FF_IDCT_FAAN){
2478  c->idct = ff_faanidct;
2480  }else{ //accurate/default
2483  c->idct = ff_simple_idct_8;
2485  }
2486  }
2487 
2493  c->gmc1 = gmc1_c;
2494  c->gmc = ff_gmc_c;
2495  c->pix_sum = pix_sum_c;
2496  c->pix_norm1 = pix_norm1_c;
2497 
2499  c->fill_block_tab[1] = fill_block8_c;
2500 
2501  /* TODO [0] 16 [1] 8 */
2502  c->pix_abs[0][0] = pix_abs16_c;
2503  c->pix_abs[0][1] = pix_abs16_x2_c;
2504  c->pix_abs[0][2] = pix_abs16_y2_c;
2505  c->pix_abs[0][3] = pix_abs16_xy2_c;
2506  c->pix_abs[1][0] = pix_abs8_c;
2507  c->pix_abs[1][1] = pix_abs8_x2_c;
2508  c->pix_abs[1][2] = pix_abs8_y2_c;
2509  c->pix_abs[1][3] = pix_abs8_xy2_c;
2510 
2520 
2530 
2531 #define dspfunc(PFX, IDX, NUM) \
2532  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2533  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2534  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2535  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2536  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2537  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2538  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2539  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2540  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2541  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2542  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2543  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2544  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2545  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2546  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2547  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2548 
2549  dspfunc(put_qpel, 0, 16);
2550  dspfunc(put_no_rnd_qpel, 0, 16);
2551 
2552  dspfunc(avg_qpel, 0, 16);
2553  /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2554 
2555  dspfunc(put_qpel, 1, 8);
2556  dspfunc(put_no_rnd_qpel, 1, 8);
2557 
2558  dspfunc(avg_qpel, 1, 8);
2559  /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2560 
2561 #undef dspfunc
2562 
2571 
2572 #define SET_CMP_FUNC(name) \
2573  c->name[0]= name ## 16_c;\
2574  c->name[1]= name ## 8x8_c;
2575 
2576  SET_CMP_FUNC(hadamard8_diff)
2577  c->hadamard8_diff[4]= hadamard8_intra16_c;
2579  SET_CMP_FUNC(dct_sad)
2580  SET_CMP_FUNC(dct_max)
2581 #if CONFIG_GPL
2582  SET_CMP_FUNC(dct264_sad)
2583 #endif
2584  c->sad[0]= pix_abs16_c;
2585  c->sad[1]= pix_abs8_c;
2586  c->sse[0]= sse16_c;
2587  c->sse[1]= sse8_c;
2588  c->sse[2]= sse4_c;
2589  SET_CMP_FUNC(quant_psnr)
2590  SET_CMP_FUNC(rd)
2591  SET_CMP_FUNC(bit)
2592  c->vsad[0]= vsad16_c;
2593  c->vsad[4]= vsad_intra16_c;
2594  c->vsad[5]= vsad_intra8_c;
2595  c->vsse[0]= vsse16_c;
2596  c->vsse[4]= vsse_intra16_c;
2597  c->vsse[5]= vsse_intra8_c;
2598  c->nsse[0]= nsse16_c;
2599  c->nsse[1]= nsse8_c;
2600 
2602 
2603  c->add_bytes= add_bytes_c;
2609  c->bswap_buf= bswap_buf;
2610  c->bswap16_buf = bswap16_buf;
2611 
2614 
2619 
2620  c->shrink[0]= av_image_copy_plane;
2621  c->shrink[1]= ff_shrink22;
2622  c->shrink[2]= ff_shrink44;
2623  c->shrink[3]= ff_shrink88;
2624 
2626 
2627 #undef FUNC
2628 #undef FUNCC
2629 #define FUNC(f, depth) f ## _ ## depth
2630 #define FUNCC(f, depth) f ## _ ## depth ## _c
2631 
2632  c->draw_edges = FUNCC(draw_edges, 8);
2633  c->clear_block = FUNCC(clear_block, 8);
2634  c->clear_blocks = FUNCC(clear_blocks, 8);
2635 
2636 #define BIT_DEPTH_FUNCS(depth) \
2637  c->get_pixels = FUNCC(get_pixels, depth);
2638 
2639  switch (avctx->bits_per_raw_sample) {
2640  case 9:
2641  case 10:
2642  BIT_DEPTH_FUNCS(16);
2643  break;
2644  default:
2645  BIT_DEPTH_FUNCS(8);
2646  break;
2647  }
2648 
2649 
2650  if (ARCH_ARM)
2651  ff_dsputil_init_arm(c, avctx);
2652  if (ARCH_BFIN)
2653  ff_dsputil_init_bfin(c, avctx);
2654  if (ARCH_PPC)
2655  ff_dsputil_init_ppc(c, avctx);
2656  if (ARCH_SH4)
2657  ff_dsputil_init_sh4(c, avctx);
2658  if (HAVE_VIS)
2659  ff_dsputil_init_vis(c, avctx);
2660  if (ARCH_X86)
2661  ff_dsputil_init_x86(c, avctx);
2662 
2665 }
static int bit8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2158
void ff_jpeg_fdct_islow_10(int16_t *data)
static int vsse16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2269
me_cmp_func vsad[6]
Definition: dsputil.h:155
void ff_fdct248_islow_10(int16_t *data)
av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2440
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation, int idct_permutation_type)
Definition: dsputil.c:130
void(* put_signed_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:129
#define ff_cropTbl
void(* dct_unquantize_inter)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo.h:703
int size
static int pix_sum_c(uint8_t *pix, int line_size)
Definition: dsputil.c:165
#define FF_CMP_DCTMAX
Definition: avcodec.h:1459
void ff_fdct_ifast(int16_t *data)
Definition: jfdctfst.c:208
int dct_algo
DCT algorithm, see FF_DCT_* below.
Definition: avcodec.h:2443
static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.c:327
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Definition: dsputil.c:2383
void(* add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.h:201
#define B
Definition: dsputil.c:1836
misc image utilities
void(* shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: dsputil.h:259
void ff_simple_idct_add_10(uint8_t *dest, int line_size, int16_t *block)
#define SET_CMP_FUNC(name)
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.c:1676
static void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:617
int acc
Definition: yuv2rgb.c:471
int(* sum_abs_dctelem)(int16_t *block)
Definition: dsputil.h:132
void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_ppc.c:141
const uint8_t ff_zigzag248_direct[64]
Definition: dsputil.c:60
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.h:249
#define FF_PARTTRANS_IDCT_PERM
Definition: dsputil.h:246
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
Definition: dsputil.h:254
int(* me_cmp_func)(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
Definition: dsputil.h:106
Scantable.
Definition: dsputil.h:111
me_cmp_func dct_max[6]
Definition: dsputil.h:158
#define MAX_NEG_CROP
Definition: dsputil.h:44
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.c:1661
#define BIT_DEPTH_FUNCS(depth)
#define av_bswap16
Definition: bswap.h:31
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.c:1776
me_cmp_func sse[6]
Definition: dsputil.h:149
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len)
Definition: dsputil.c:2327
void ff_faandct248(int16_t *data)
Definition: faandct.c:181
uint8_t raster_end[64]
Definition: dsputil.h:114
#define op_avg(a, b)
Definition: dsputil.c:1254
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.c:1841
mpegvideo header.
#define pb_80
Definition: dsputil.c:56
uint8_t permutated[64]
Definition: dsputil.h:113
#define HAVE_VIS
Definition: config.h:58
uint8_t run
Definition: svq3.c:142
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Definition: avcodec.h:2488
#define FF_DCT_FAAN
Definition: avcodec.h:2449
uint8_t * intra_ac_vlc_length
Definition: mpegvideo.h:502
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, int order)
Definition: dsputil.c:2363
#define UNI_AC_ENC_INDEX(run, level)
Definition: mpegvideo.h:507
int stride
Definition: mace.c:144
int qscale
QP.
Definition: mpegvideo.h:392
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:226
static int vsad16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2235
void(* dct_unquantize_intra)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo.h:701
int(* pix_sum)(uint8_t *pix, int line_size)
Definition: dsputil.h:144
Macro definitions for various function/variable attributes.
#define _(x)
static void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:659
static void add_pixels8_c(uint8_t *restrict pixels, int16_t *block, int line_size)
Definition: dsputil.c:390
Definition: vf_drawbox.c:37
#define dspfunc(PFX, IDX, NUM)
static void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:628
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1545
const uint8_t ff_alternate_vertical_scan[64]
Definition: dsputil.c:82
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:220
static int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h)
Definition: dsputil.c:1886
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1370
static int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1525
uint8_t bits
Definition: crc.c:216
uint8_t
#define RECON_SHIFT
Definition: dsputil.h:252
#define av_cold
Definition: attributes.h:66
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1358
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1609
static void put_signed_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:369
#define FF_IDCT_INT
Definition: avcodec.h:2458
static void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:606
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:438
void ff_faanidct(int16_t block[64])
Definition: faanidct.c:132
static void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:703
#define b
Definition: input.c:52
#define WRAPPER8_16_SQ(name8, name16)
Definition: dsputil.c:2293
#define FF_CMP_VSSE
Definition: avcodec.h:1457
static void vector_clipf_c(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.c:2345
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.c:2284
void(* add_bytes)(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.h:192
void(* add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:130
me_cmp_func dct_sad[6]
Definition: dsputil.h:151
#define R
Definition: dsputil.c:1838
void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type)
Definition: dsputil.c:1688
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1382
uint8_t idct_permutation[64]
idct input permutation.
Definition: dsputil.h:240
const uint8_t * scantable
Definition: dsputil.h:112
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc)
Definition: dsputil.c:1811
static void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:542
void ff_simple_idct_put_10(uint8_t *dest, int line_size, int16_t *block)
void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_init.c:661
static void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:692
#define FF_CMP_SSE
Definition: avcodec.h:1449
static int hadamard8_intra8x8_c(void *s, uint8_t *src, uint8_t *dummy, int stride, int h)
Definition: dsputil.c:1931
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:448
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:208
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1272
#define cm
Definition: dvbsubdec.c:34
static void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:670
me_cmp_func nsse[6]
Definition: dsputil.h:157
qpel_mc_func put_mspel_pixels_tab[8]
Definition: dsputil.h:187
#define r
Definition: input.c:51
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void(* fdct248)(int16_t *block)
Definition: dsputil.h:210
int32_t(* scalarproduct_int16)(const int16_t *v1, const int16_t *v2, int len)
Calculate scalar product of two vectors.
Definition: dsputil.h:265
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:123
#define FF_SSE2_IDCT_PERM
Definition: dsputil.h:247
void(* bswap16_buf)(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.h:203
uint8_t * inter_ac_vlc_last_length
Definition: mpegvideo.h:505
void(* sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
subtract huffyuv's variant of median prediction note, this might read from src1[-1], src2[-1]
Definition: dsputil.h:198
tpel_mc_func avg_tpel_pixels_tab[11]
Definition: dsputil.h:182
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void(* bswap_buf)(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.h:202
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2399
const uint8_t ff_alternate_horizontal_scan[64]
Definition: dsputil.c:71
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.c:253
g
Definition: yuv2rgb.c:535
static uint32_t clipf_c_one(uint32_t a, uint32_t mini, uint32_t maxi, uint32_t maxisign)
Definition: dsputil.c:2318
#define VSAD_INTRA(size)
Definition: dsputil.c:2217
static void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:584
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
Definition: dsputil.c:484
static int quant_psnr8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2059
#define op_put(a, b)
Definition: dsputil.c:1256
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:148
int(* add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left)
Definition: dsputil.h:200
int ff_check_alignment(void)
Definition: dsputil.c:2420
#define ARCH_SH4
Definition: config.h:27
void ff_faanidct_put(uint8_t *dest, int line_size, int16_t block[64])
Definition: faanidct.c:158
#define FF_LIBMPEG2_IDCT_PERM
Definition: dsputil.h:243
#define FFMAX(a, b)
Definition: common.h:55
#define BASIS_SHIFT
Definition: dsputil.h:251
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1276
void(* idct)(int16_t *block)
Definition: dsputil.h:213
#define QPEL_MC(r, OPNAME, RND, OP)
Definition: dsputil.c:736
me_cmp_func vsse[6]
Definition: dsputil.h:156
static int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1409
static int dct_max8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2043
static void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:681
static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2404
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1587
common internal API header
#define BUTTERFLYA(x, y)
Definition: dsputil.c:1884
uint8_t * intra_ac_vlc_last_length
Definition: mpegvideo.h:503
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1377
void(* add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.h:199
#define FF_CMP_BIT
Definition: avcodec.h:1453
static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:410
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:239
uint32_t ff_squareTbl[512]
Definition: dsputil.c:45
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:175
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1402
void(* clear_blocks)(int16_t *blocks)
Definition: dsputil.h:143
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
Definition: avcodec.h:2456
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
Definition: dsputil.c:461
static int sse8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:276
int32_t
void ff_dsputil_init_vis(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_vis.c:26
void ff_fdct248_islow_8(int16_t *data)
#define FF_NO_IDCT_PERM
Definition: dsputil.h:242
#define FFABS(a)
Definition: common.h:52
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags)
compares a block (either a full macroblock or a partition thereof) against a proposed motion-compensa...
Definition: motion_est.c:251
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:314
static void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:725
static void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:595
#define ARCH_PPC
Definition: config.h:24
#define ARCH_ARM
Definition: config.h:14
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1565
void ff_simple_idct_10(int16_t *block)
int ac_esc_length
num of bits needed to encode the longest esc
Definition: mpegvideo.h:501
#define FF_CMP_SAD
Definition: avcodec.h:1448
static void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:648
me_cmp_func bit[6]
Definition: dsputil.h:153
static const float pred[4]
Definition: siprdata.h:259
static int dct_sad8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:1979
NULL
Definition: eval.c:55
static int width
Definition: utils.c:156
#define av_bswap32
Definition: bswap.h:33
uint8_t * luma_dc_vlc_length
Definition: mpegvideo.h:506
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
Definition: faandct.c:121
Libavcodec external API header.
void(* put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:128
#define FF_CMP_RD
Definition: avcodec.h:1454
int idct_permutation_type
Definition: dsputil.h:241
static int pix_norm1_c(uint8_t *pix, int line_size)
Definition: dsputil.c:187
void(* vector_clipf)(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.h:206
main external API structure.
Definition: avcodec.h:1054
#define FF_SIMPLE_IDCT_PERM
Definition: dsputil.h:244
#define avg4(a, b, c, d)
Definition: dsputil.c:459
ScanTable intra_scantable
Definition: mpegvideo.h:319
#define FF_CMP_NSSE
Definition: avcodec.h:1458
me_cmp_func dct264_sad[6]
Definition: dsputil.h:159
#define FF_CMP_SATD
Definition: avcodec.h:1450
av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable)
Definition: dsputil.c:107
op_fill_func fill_block_tab[2]
Definition: dsputil.h:290
void(* diff_pixels)(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.h:127
void(* gmc1)(uint8_t *dst, uint8_t *src, int srcStride, int h, int x16, int y16, int rounder)
translational global motion compensation.
Definition: dsputil.h:136
#define FF_CMP_DCT
Definition: avcodec.h:1451
static void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:714
uint8_t * inter_ac_vlc_length
Definition: mpegvideo.h:504
#define SQ(a)
Definition: dsputil.c:2250
static void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:639
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1392
av_cold void ff_dsputil_static_init(void)
Definition: dsputil.c:2411
int index
Definition: gxfenc.c:72
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1365
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
Definition: dsputil.c:1292
#define avg2(a, b)
Definition: dsputil.c:458
#define mid_pred
Definition: mathops.h:94
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.h:250
#define FF_CMP_PSNR
Definition: avcodec.h:1452
DSPContext dsp
pointers for accelerated dsp functions
Definition: mpegvideo.h:411
int(* pix_norm1)(uint8_t *pix, int line_size)
Definition: dsputil.h:145
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
static void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:573
#define pb_7f
Definition: dsputil.c:55
#define D
Definition: options_table.h:39
int(* ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.h:168
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.c:235
static int sum_abs_dctelem_c(int16_t *block)
Definition: dsputil.c:430
#define FF_CMP_DCT264
Definition: avcodec.h:1460
void ff_fdct_ifast248(int16_t *data)
Definition: jfdctfst.c:274
av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_bfin.c:126
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
Definition: dsputil.c:1793
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w)
Definition: dsputil.c:1329
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1465
static const uint16_t scale[4]
void ff_simple_idct_add_8(uint8_t *dest, int line_size, int16_t *block)
uint8_t level
Definition: svq3.c:143
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1635
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h)
Definition: dsputil.c:1684
static int rd8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2082
#define FF_CMP_ZERO
Definition: avcodec.h:1455
me_cmp_func rd[6]
Definition: dsputil.h:154
static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
Definition: dsputil.c:2373
int height
Definition: gxfenc.c:72
MpegEncContext.
Definition: mpegvideo.h:264
tpel_mc_func put_tpel_pixels_tab[11]
Thirdpel motion compensation with rounding (a+b+1)>>1.
Definition: dsputil.h:181
struct AVCodecContext * avctx
Definition: mpegvideo.h:266
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1437
#define BUTTERFLY1(x, y)
Definition: dsputil.c:1875
static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:348
#define op_put_no_rnd(a, b)
Definition: dsputil.c:1257
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
Definition: dsputil.c:1751
void(* vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Clip each element in an array of int32_t to a given minimum and maximum value.
Definition: dsputil.h:287
void(* diff_bytes)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
Definition: dsputil.h:193
static const uint8_t idct_sse2_row_perm[8]
Definition: dsputil.c:105
static void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:562
#define VSSE_INTRA(size)
Definition: dsputil.c:2251
#define ARCH_X86
Definition: config.h:33
int nsse_weight
noise vs.
Definition: avcodec.h:2589
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1495
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation.
Definition: dsputil.h:140
void ff_dsputil_init_sh4(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_sh4.c:93
static int16_t basis[64][64]
#define FF_IDCT_FAAN
Definition: avcodec.h:2470
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.c:1740
void(* clear_block)(int16_t *block)
Definition: dsputil.h:142
me_cmp_func sad[6]
Definition: dsputil.h:148
int32_t(* scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul)
Calculate scalar product of v1 and v2, and v1[i] += v3[i] * mul.
Definition: dsputil.h:272
static const uint8_t simple_mmx_permutation[64]
Definition: dsputil.c:94
static int sse4_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:259
#define FF_CMP_VSAD
Definition: avcodec.h:1456
void ff_simple_idct_put_8(uint8_t *dest, int line_size, int16_t *block)
DSP utils.
me_cmp_func hadamard8_diff[6]
Definition: dsputil.h:150
simple idct header.
#define FF_DCT_FASTINT
Definition: avcodec.h:2445
int len
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void(* fdct)(int16_t *block)
Definition: dsputil.h:209
void(* add_pixels8)(uint8_t *pixels, int16_t *block, int line_size)
Definition: dsputil.h:131
void ff_faanidct_add(uint8_t *dest, int line_size, int16_t block[64])
Definition: faanidct.c:145
#define FF_TRANSPOSE_IDCT_PERM
Definition: dsputil.h:245
me_cmp_func quant_psnr[6]
Definition: dsputil.h:152
#define G
Definition: dsputil.c:1837
static void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: copy_block.h:26
#define DST(x, y)
Definition: vp9dsp.c:593
int(* fast_dct_quantize)(struct MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow)
Definition: mpegvideo.h:706
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:106
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1280
Floating point AAN DCT
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:297
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:231
#define BUTTERFLY2(o1, o2, i1, i2)
Definition: dsputil.c:1871
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1268
#define ARCH_BFIN
Definition: config.h:18
#define A
Definition: dsputil.c:1839
static const uint16_t rounder[4]
#define restrict
Definition: config.h:8
float min
static void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:551
me_cmp_func pix_abs[2][4]
Definition: dsputil.h:189
DSPContext.
Definition: dsputil.h:124
#define FUNCC(f, depth)
void ff_simple_idct_8(int16_t *block)
#define SRC(x, y)
static int16_t block[64]
Definition: dct-test.c:170