Libav
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of Libav.
7  *
8  * Libav is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * Libav is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with Libav; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 
28 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int size,
29  GetBitContext *gb, int pcm_bit_depth)
30 {
31  int x, y;
32  pixel *dst = (pixel *)_dst;
33 
34  stride /= sizeof(pixel);
35 
36  for (y = 0; y < size; y++) {
37  for (x = 0; x < size; x++)
38  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
39  dst += stride;
40  }
41 }
42 
43 static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs,
44  ptrdiff_t stride)
45 {
46  int x, y;
47  pixel *dst = (pixel *)_dst;
48 
49  stride /= sizeof(pixel);
50 
51  for (y = 0; y < 4; y++) {
52  for (x = 0; x < 4; x++) {
53  dst[x] = av_clip_pixel(dst[x] + *coeffs);
54  coeffs++;
55  }
56  dst += stride;
57  }
58 }
59 
60 static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs,
61  ptrdiff_t stride)
62 {
63  int x, y;
64  pixel *dst = (pixel *)_dst;
65 
66  stride /= sizeof(pixel);
67 
68  for (y = 0; y < 8; y++) {
69  for (x = 0; x < 8; x++) {
70  dst[x] = av_clip_pixel(dst[x] + *coeffs);
71  coeffs++;
72  }
73  dst += stride;
74  }
75 }
76 
77 static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs,
78  ptrdiff_t stride)
79 {
80  int x, y;
81  pixel *dst = (pixel *)_dst;
82 
83  stride /= sizeof(pixel);
84 
85  for (y = 0; y < 16; y++) {
86  for (x = 0; x < 16; x++) {
87  dst[x] = av_clip_pixel(dst[x] + *coeffs);
88  coeffs++;
89  }
90  dst += stride;
91  }
92 }
93 
94 static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs,
95  ptrdiff_t stride)
96 {
97  int x, y;
98  pixel *dst = (pixel *)_dst;
99 
100  stride /= sizeof(pixel);
101 
102  for (y = 0; y < 32; y++) {
103  for (x = 0; x < 32; x++) {
104  dst[x] = av_clip_pixel(dst[x] + *coeffs);
105  coeffs++;
106  }
107  dst += stride;
108  }
109 }
110 
111 static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs,
112  ptrdiff_t stride)
113 {
114  pixel *dst = (pixel *)_dst;
115  int shift = 13 - BIT_DEPTH;
116 #if BIT_DEPTH <= 13
117  int offset = 1 << (shift - 1);
118 #else
119  int offset = 0;
120 #endif
121  int x, y;
122 
123  stride /= sizeof(pixel);
124 
125  for (y = 0; y < 4 * 4; y += 4) {
126  for (x = 0; x < 4; x++)
127  dst[x] = av_clip_pixel(dst[x] + ((coeffs[y + x] + offset) >> shift));
128  dst += stride;
129  }
130 }
131 
132 #define SET(dst, x) (dst) = (x)
133 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
134 #define ADD_AND_SCALE(dst, x) \
135  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
136 
137 #define TR_4x4_LUMA(dst, src, step, assign) \
138  do { \
139  int c0 = src[0 * step] + src[2 * step]; \
140  int c1 = src[2 * step] + src[3 * step]; \
141  int c2 = src[0 * step] - src[3 * step]; \
142  int c3 = 74 * src[1 * step]; \
143  \
144  assign(dst[2 * step], 74 * (src[0 * step] - \
145  src[2 * step] + \
146  src[3 * step])); \
147  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
148  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
149  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
150  } while (0)
151 
152 static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
153  ptrdiff_t stride)
154 {
155  int i;
156  pixel *dst = (pixel *)_dst;
157  int shift = 7;
158  int add = 1 << (shift - 1);
159  int16_t *src = coeffs;
160 
161  stride /= sizeof(pixel);
162 
163  for (i = 0; i < 4; i++) {
164  TR_4x4_LUMA(src, src, 4, SCALE);
165  src++;
166  }
167 
168  shift = 20 - BIT_DEPTH;
169  add = 1 << (shift - 1);
170  for (i = 0; i < 4; i++) {
171  TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
172  coeffs += 4;
173  dst += stride;
174  }
175 }
176 
177 #undef TR_4x4_LUMA
178 
179 #define TR_4(dst, src, dstep, sstep, assign) \
180  do { \
181  const int e0 = transform[8 * 0][0] * src[0 * sstep] + \
182  transform[8 * 2][0] * src[2 * sstep]; \
183  const int e1 = transform[8 * 0][1] * src[0 * sstep] + \
184  transform[8 * 2][1] * src[2 * sstep]; \
185  const int o0 = transform[8 * 1][0] * src[1 * sstep] + \
186  transform[8 * 3][0] * src[3 * sstep]; \
187  const int o1 = transform[8 * 1][1] * src[1 * sstep] + \
188  transform[8 * 3][1] * src[3 * sstep]; \
189  \
190  assign(dst[0 * dstep], e0 + o0); \
191  assign(dst[1 * dstep], e1 + o1); \
192  assign(dst[2 * dstep], e1 - o1); \
193  assign(dst[3 * dstep], e0 - o0); \
194  } while (0)
195 
196 static void FUNC(transform_4x4_add)(uint8_t *_dst, int16_t *coeffs,
197  ptrdiff_t stride)
198 {
199  int i;
200  pixel *dst = (pixel *)_dst;
201  int shift = 7;
202  int add = 1 << (shift - 1);
203  int16_t *src = coeffs;
204 
205  stride /= sizeof(pixel);
206 
207  for (i = 0; i < 4; i++) {
208  TR_4(src, src, 4, 4, SCALE);
209  src++;
210  }
211 
212  shift = 20 - BIT_DEPTH;
213  add = 1 << (shift - 1);
214  for (i = 0; i < 4; i++) {
215  TR_4(dst, coeffs, 1, 1, ADD_AND_SCALE);
216  coeffs += 4;
217  dst += stride;
218  }
219 }
220 
221 #define TR_8(dst, src, dstep, sstep, assign) \
222  do { \
223  int i, j; \
224  int e_8[4]; \
225  int o_8[4] = { 0 }; \
226  for (i = 0; i < 4; i++) \
227  for (j = 1; j < 8; j += 2) \
228  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
229  TR_4(e_8, src, 1, 2 * sstep, SET); \
230  \
231  for (i = 0; i < 4; i++) { \
232  assign(dst[i * dstep], e_8[i] + o_8[i]); \
233  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
234  } \
235  } while (0)
236 
237 #define TR_16(dst, src, dstep, sstep, assign) \
238  do { \
239  int i, j; \
240  int e_16[8]; \
241  int o_16[8] = { 0 }; \
242  for (i = 0; i < 8; i++) \
243  for (j = 1; j < 16; j += 2) \
244  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
245  TR_8(e_16, src, 1, 2 * sstep, SET); \
246  \
247  for (i = 0; i < 8; i++) { \
248  assign(dst[i * dstep], e_16[i] + o_16[i]); \
249  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
250  } \
251  } while (0)
252 
253 #define TR_32(dst, src, dstep, sstep, assign) \
254  do { \
255  int i, j; \
256  int e_32[16]; \
257  int o_32[16] = { 0 }; \
258  for (i = 0; i < 16; i++) \
259  for (j = 1; j < 32; j += 2) \
260  o_32[i] += transform[j][i] * src[j * sstep]; \
261  TR_16(e_32, src, 1, 2 * sstep, SET); \
262  \
263  for (i = 0; i < 16; i++) { \
264  assign(dst[i * dstep], e_32[i] + o_32[i]); \
265  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
266  } \
267  } while (0)
268 
269 
270 
271 static void FUNC(transform_8x8_add)(uint8_t *_dst, int16_t *coeffs,
272  ptrdiff_t stride)
273 {
274  int i;
275  pixel *dst = (pixel *)_dst;
276  int shift = 7;
277  int add = 1 << (shift - 1);
278  int16_t *src = coeffs;
279 
280  stride /= sizeof(pixel);
281 
282  for (i = 0; i < 8; i++) {
283  TR_8(src, src, 8, 8, SCALE);
284  src++;
285  }
286 
287  shift = 20 - BIT_DEPTH;
288  add = 1 << (shift - 1);
289  for (i = 0; i < 8; i++) {
290  TR_8(dst, coeffs, 1, 1, ADD_AND_SCALE);
291  coeffs += 8;
292  dst += stride;
293  }
294 }
295 
296 static void FUNC(transform_16x16_add)(uint8_t *_dst, int16_t *coeffs,
297  ptrdiff_t stride)
298 {
299  int i;
300  pixel *dst = (pixel *)_dst;
301  int shift = 7;
302  int add = 1 << (shift - 1);
303  int16_t *src = coeffs;
304 
305  stride /= sizeof(pixel);
306 
307  for (i = 0; i < 16; i++) {
308  TR_16(src, src, 16, 16, SCALE);
309  src++;
310  }
311 
312  shift = 20 - BIT_DEPTH;
313  add = 1 << (shift - 1);
314  for (i = 0; i < 16; i++) {
315  TR_16(dst, coeffs, 1, 1, ADD_AND_SCALE);
316  coeffs += 16;
317  dst += stride;
318  }
319 }
320 
321 static void FUNC(transform_32x32_add)(uint8_t *_dst, int16_t *coeffs,
322  ptrdiff_t stride)
323 {
324  int i;
325  pixel *dst = (pixel *)_dst;
326  int shift = 7;
327  int add = 1 << (shift - 1);
328  int16_t *src = coeffs;
329 
330  stride /= sizeof(pixel);
331 
332  for (i = 0; i < 32; i++) {
333  TR_32(src, src, 32, 32, SCALE);
334  src++;
335  }
336  src = coeffs;
337  shift = 20 - BIT_DEPTH;
338  add = 1 << (shift - 1);
339  for (i = 0; i < 32; i++) {
340  TR_32(dst, coeffs, 1, 1, ADD_AND_SCALE);
341  coeffs += 32;
342  dst += stride;
343  }
344 }
345 
346 static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
347  ptrdiff_t stride, SAOParams *sao,
348  int *borders, int width, int height,
349  int c_idx, int class)
350 {
351  pixel *dst = (pixel *)_dst;
352  pixel *src = (pixel *)_src;
353  int offset_table[32] = { 0 };
354  int k, y, x;
355  int chroma = !!c_idx;
356  int shift = BIT_DEPTH - 5;
357  int *sao_offset_val = sao->offset_val[c_idx];
358  int sao_left_class = sao->band_position[c_idx];
359  int init_y = 0, init_x = 0;
360 
361  stride /= sizeof(pixel);
362 
363  switch (class) {
364  case 0:
365  if (!borders[2])
366  width -= (8 >> chroma) + 2;
367  if (!borders[3])
368  height -= (4 >> chroma) + 2;
369  break;
370  case 1:
371  init_y = -(4 >> chroma) - 2;
372  if (!borders[2])
373  width -= (8 >> chroma) + 2;
374  height = (4 >> chroma) + 2;
375  break;
376  case 2:
377  init_x = -(8 >> chroma) - 2;
378  width = (8 >> chroma) + 2;
379  if (!borders[3])
380  height -= (4 >> chroma) + 2;
381  break;
382  case 3:
383  init_y = -(4 >> chroma) - 2;
384  init_x = -(8 >> chroma) - 2;
385  width = (8 >> chroma) + 2;
386  height = (4 >> chroma) + 2;
387  break;
388  }
389 
390  dst = dst + (init_y * stride + init_x);
391  src = src + (init_y * stride + init_x);
392  for (k = 0; k < 4; k++)
393  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
394  for (y = 0; y < height; y++) {
395  for (x = 0; x < width; x++)
396  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
397  dst += stride;
398  src += stride;
399  }
400 }
401 
402 static void FUNC(sao_band_filter_0)(uint8_t *dst, uint8_t *src,
403  ptrdiff_t stride, SAOParams *sao,
404  int *borders, int width, int height,
405  int c_idx)
406 {
407  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
408  width, height, c_idx, 0);
409 }
410 
411 static void FUNC(sao_band_filter_1)(uint8_t *dst, uint8_t *src,
412  ptrdiff_t stride, SAOParams *sao,
413  int *borders, int width, int height,
414  int c_idx)
415 {
416  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
417  width, height, c_idx, 1);
418 }
419 
420 static void FUNC(sao_band_filter_2)(uint8_t *dst, uint8_t *src,
421  ptrdiff_t stride, SAOParams *sao,
422  int *borders, int width, int height,
423  int c_idx)
424 {
425  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
426  width, height, c_idx, 2);
427 }
428 
429 static void FUNC(sao_band_filter_3)(uint8_t *_dst, uint8_t *_src,
430  ptrdiff_t stride, SAOParams *sao,
431  int *borders, int width, int height,
432  int c_idx)
433 {
434  FUNC(sao_band_filter)(_dst, _src, stride, sao, borders,
435  width, height, c_idx, 3);
436 }
437 
438 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
439  ptrdiff_t stride, SAOParams *sao,
440  int *borders, int _width, int _height,
441  int c_idx, uint8_t vert_edge,
442  uint8_t horiz_edge, uint8_t diag_edge)
443 {
444  int x, y;
445  pixel *dst = (pixel *)_dst;
446  pixel *src = (pixel *)_src;
447  int chroma = !!c_idx;
448  int *sao_offset_val = sao->offset_val[c_idx];
449  int sao_eo_class = sao->eo_class[c_idx];
450  int init_x = 0, init_y = 0, width = _width, height = _height;
451 
452  static const int8_t pos[4][2][2] = {
453  { { -1, 0 }, { 1, 0 } }, // horizontal
454  { { 0, -1 }, { 0, 1 } }, // vertical
455  { { -1, -1 }, { 1, 1 } }, // 45 degree
456  { { 1, -1 }, { -1, 1 } }, // 135 degree
457  };
458  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
459 
460 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
461 
462  stride /= sizeof(pixel);
463 
464  if (!borders[2])
465  width -= (8 >> chroma) + 2;
466  if (!borders[3])
467  height -= (4 >> chroma) + 2;
468 
469  dst = dst + (init_y * stride + init_x);
470  src = src + (init_y * stride + init_x);
471  init_y = init_x = 0;
472  if (sao_eo_class != SAO_EO_VERT) {
473  if (borders[0]) {
474  int offset_val = sao_offset_val[0];
475  int y_stride = 0;
476  for (y = 0; y < height; y++) {
477  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
478  y_stride += stride;
479  }
480  init_x = 1;
481  }
482  if (borders[2]) {
483  int offset_val = sao_offset_val[0];
484  int x_stride = width - 1;
485  for (x = 0; x < height; x++) {
486  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
487  x_stride += stride;
488  }
489  width--;
490  }
491  }
492  if (sao_eo_class != SAO_EO_HORIZ) {
493  if (borders[1]) {
494  int offset_val = sao_offset_val[0];
495  for (x = init_x; x < width; x++)
496  dst[x] = av_clip_pixel(src[x] + offset_val);
497  init_y = 1;
498  }
499  if (borders[3]) {
500  int offset_val = sao_offset_val[0];
501  int y_stride = stride * (height - 1);
502  for (x = init_x; x < width; x++)
503  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
504  height--;
505  }
506  }
507  {
508  int y_stride = init_y * stride;
509  int pos_0_0 = pos[sao_eo_class][0][0];
510  int pos_0_1 = pos[sao_eo_class][0][1];
511  int pos_1_0 = pos[sao_eo_class][1][0];
512  int pos_1_1 = pos[sao_eo_class][1][1];
513 
514  int y_stride_0_1 = (init_y + pos_0_1) * stride;
515  int y_stride_1_1 = (init_y + pos_1_1) * stride;
516  for (y = init_y; y < height; y++) {
517  for (x = init_x; x < width; x++) {
518  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
519  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
520  int offset_val = edge_idx[2 + diff0 + diff1];
521  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
522  }
523  y_stride += stride;
524  y_stride_0_1 += stride;
525  y_stride_1_1 += stride;
526  }
527  }
528 
529  {
530  // Restore pixels that can't be modified
531  int save_upper_left = !diag_edge && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
532  if (vert_edge && sao_eo_class != SAO_EO_VERT)
533  for (y = init_y+save_upper_left; y< height; y++)
534  dst[y*stride] = src[y*stride];
535  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
536  for(x = init_x+save_upper_left; x<width; x++)
537  dst[x] = src[x];
538  if(diag_edge && sao_eo_class == SAO_EO_135D)
539  dst[0] = src[0];
540  }
541 
542 #undef CMP
543 }
544 
545 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
546  ptrdiff_t stride, SAOParams *sao,
547  int *borders, int _width, int _height,
548  int c_idx, uint8_t vert_edge,
549  uint8_t horiz_edge, uint8_t diag_edge)
550 {
551  int x, y;
552  pixel *dst = (pixel *)_dst;
553  pixel *src = (pixel *)_src;
554  int chroma = !!c_idx;
555  int *sao_offset_val = sao->offset_val[c_idx];
556  int sao_eo_class = sao->eo_class[c_idx];
557  int init_x = 0, init_y = 0, width = _width, height = _height;
558 
559  static const int8_t pos[4][2][2] = {
560  { { -1, 0 }, { 1, 0 } }, // horizontal
561  { { 0, -1 }, { 0, 1 } }, // vertical
562  { { -1, -1 }, { 1, 1 } }, // 45 degree
563  { { 1, -1 }, { -1, 1 } }, // 135 degree
564  };
565  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
566 
567 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
568 
569  stride /= sizeof(pixel);
570 
571  init_y = -(4 >> chroma) - 2;
572  if (!borders[2])
573  width -= (8 >> chroma) + 2;
574  height = (4 >> chroma) + 2;
575 
576  dst = dst + (init_y * stride + init_x);
577  src = src + (init_y * stride + init_x);
578  init_y = init_x = 0;
579  if (sao_eo_class != SAO_EO_VERT) {
580  if (borders[0]) {
581  int offset_val = sao_offset_val[0];
582  int y_stride = 0;
583  for (y = 0; y < height; y++) {
584  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
585  y_stride += stride;
586  }
587  init_x = 1;
588  }
589  if (borders[2]) {
590  int offset_val = sao_offset_val[0];
591  int x_stride = width - 1;
592  for (x = 0; x < height; x++) {
593  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
594  x_stride += stride;
595  }
596  width--;
597  }
598  }
599  {
600  int y_stride = init_y * stride;
601  int pos_0_0 = pos[sao_eo_class][0][0];
602  int pos_0_1 = pos[sao_eo_class][0][1];
603  int pos_1_0 = pos[sao_eo_class][1][0];
604  int pos_1_1 = pos[sao_eo_class][1][1];
605 
606  int y_stride_0_1 = (init_y + pos_0_1) * stride;
607  int y_stride_1_1 = (init_y + pos_1_1) * stride;
608  for (y = init_y; y < height; y++) {
609  for (x = init_x; x < width; x++) {
610  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
611  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
612  int offset_val = edge_idx[2 + diff0 + diff1];
613  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
614  }
615  y_stride += stride;
616  y_stride_0_1 += stride;
617  y_stride_1_1 += stride;
618  }
619  }
620 
621  {
622  // Restore pixels that can't be modified
623  int save_lower_left = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[0];
624  if(vert_edge && sao_eo_class != SAO_EO_VERT)
625  for(y = init_y; y< height-save_lower_left; y++)
626  dst[y*stride] = src[y*stride];
627  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
628  for(x = init_x+save_lower_left; x<width; x++)
629  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
630  if(diag_edge && sao_eo_class == SAO_EO_45D)
631  dst[stride*(height-1)] = src[stride*(height-1)];
632  }
633 
634 #undef CMP
635 }
636 
637 static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src,
638  ptrdiff_t stride, SAOParams *sao,
639  int *borders, int _width, int _height,
640  int c_idx, uint8_t vert_edge,
641  uint8_t horiz_edge, uint8_t diag_edge)
642 {
643  int x, y;
644  pixel *dst = (pixel *)_dst;
645  pixel *src = (pixel *)_src;
646  int chroma = !!c_idx;
647  int *sao_offset_val = sao->offset_val[c_idx];
648  int sao_eo_class = sao->eo_class[c_idx];
649  int init_x = 0, init_y = 0, width = _width, height = _height;
650 
651  static const int8_t pos[4][2][2] = {
652  { { -1, 0 }, { 1, 0 } }, // horizontal
653  { { 0, -1 }, { 0, 1 } }, // vertical
654  { { -1, -1 }, { 1, 1 } }, // 45 degree
655  { { 1, -1 }, { -1, 1 } }, // 135 degree
656  };
657  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
658 
659 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
660 
661  stride /= sizeof(pixel);
662 
663  init_x = -(8 >> chroma) - 2;
664  width = (8 >> chroma) + 2;
665  if (!borders[3])
666  height -= (4 >> chroma) + 2;
667 
668  dst = dst + (init_y * stride + init_x);
669  src = src + (init_y * stride + init_x);
670  init_y = init_x = 0;
671  if (sao_eo_class != SAO_EO_HORIZ) {
672  if (borders[1]) {
673  int offset_val = sao_offset_val[0];
674  for (x = init_x; x < width; x++)
675  dst[x] = av_clip_pixel(src[x] + offset_val);
676  init_y = 1;
677  }
678  if (borders[3]) {
679  int offset_val = sao_offset_val[0];
680  int y_stride = stride * (height - 1);
681  for (x = init_x; x < width; x++)
682  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
683  height--;
684  }
685  }
686  {
687  int y_stride = init_y * stride;
688  int pos_0_0 = pos[sao_eo_class][0][0];
689  int pos_0_1 = pos[sao_eo_class][0][1];
690  int pos_1_0 = pos[sao_eo_class][1][0];
691  int pos_1_1 = pos[sao_eo_class][1][1];
692 
693  int y_stride_0_1 = (init_y + pos_0_1) * stride;
694  int y_stride_1_1 = (init_y + pos_1_1) * stride;
695  for (y = init_y; y < height; y++) {
696  for (x = init_x; x < width; x++) {
697  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
698  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
699  int offset_val = edge_idx[2 + diff0 + diff1];
700  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
701  }
702  y_stride += stride;
703  y_stride_0_1 += stride;
704  y_stride_1_1 += stride;
705  }
706  }
707 
708  {
709  // Restore pixels that can't be modified
710  int save_upper_right = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[1];
711  if(vert_edge && sao_eo_class != SAO_EO_VERT)
712  for(y = init_y+save_upper_right; y< height; y++)
713  dst[y*stride+width-1] = src[y*stride+width-1];
714  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
715  for(x = init_x; x<width-save_upper_right; x++)
716  dst[x] = src[x];
717  if(diag_edge && sao_eo_class == SAO_EO_45D)
718  dst[width-1] = src[width-1];
719  }
720 #undef CMP
721 }
722 
723 static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
724  ptrdiff_t stride, SAOParams *sao,
725  int *borders, int _width, int _height,
726  int c_idx, uint8_t vert_edge,
727  uint8_t horiz_edge, uint8_t diag_edge)
728 {
729  int x, y;
730  pixel *dst = (pixel *)_dst;
731  pixel *src = (pixel *)_src;
732  int chroma = !!c_idx;
733  int *sao_offset_val = sao->offset_val[c_idx];
734  int sao_eo_class = sao->eo_class[c_idx];
735  int init_x = 0, init_y = 0, width = _width, height = _height;
736 
737  static const int8_t pos[4][2][2] = {
738  { { -1, 0 }, { 1, 0 } }, // horizontal
739  { { 0, -1 }, { 0, 1 } }, // vertical
740  { { -1, -1 }, { 1, 1 } }, // 45 degree
741  { { 1, -1 }, { -1, 1 } }, // 135 degree
742  };
743  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
744 
745 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
746 
747  stride /= sizeof(pixel);
748 
749  init_y = -(4 >> chroma) - 2;
750  init_x = -(8 >> chroma) - 2;
751  width = (8 >> chroma) + 2;
752  height = (4 >> chroma) + 2;
753 
754 
755  dst = dst + (init_y * stride + init_x);
756  src = src + (init_y * stride + init_x);
757  init_y = init_x = 0;
758 
759  {
760  int y_stride = init_y * stride;
761  int pos_0_0 = pos[sao_eo_class][0][0];
762  int pos_0_1 = pos[sao_eo_class][0][1];
763  int pos_1_0 = pos[sao_eo_class][1][0];
764  int pos_1_1 = pos[sao_eo_class][1][1];
765 
766  int y_stride_0_1 = (init_y + pos_0_1) * stride;
767  int y_stride_1_1 = (init_y + pos_1_1) * stride;
768 
769  for (y = init_y; y < height; y++) {
770  for (x = init_x; x < width; x++) {
771  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
772  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
773  int offset_val = edge_idx[2 + diff0 + diff1];
774  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
775  }
776  y_stride += stride;
777  y_stride_0_1 += stride;
778  y_stride_1_1 += stride;
779  }
780  }
781 
782  {
783  // Restore pixels that can't be modified
784  int save_lower_right = !diag_edge && sao_eo_class == SAO_EO_135D;
785  if(vert_edge && sao_eo_class != SAO_EO_VERT)
786  for(y = init_y; y< height-save_lower_right; y++)
787  dst[y*stride+width-1] = src[y*stride+width-1];
788  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
789  for(x = init_x; x<width-save_lower_right; x++)
790  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
791  if(diag_edge && sao_eo_class == SAO_EO_135D)
792  dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
793  }
794 #undef CMP
795 }
796 
797 #undef SET
798 #undef SCALE
799 #undef ADD_AND_SCALE
800 #undef TR_4
801 #undef TR_8
802 #undef TR_16
803 #undef TR_32
804 
805 static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
806  uint8_t *_src, ptrdiff_t _srcstride,
807  int width, int height, int16_t* mcbuffer)
808 {
809  int x, y;
810  pixel *src = (pixel *)_src;
811  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
812 
813  for (y = 0; y < height; y++) {
814  for (x = 0; x < width; x++)
815  dst[x] = src[x] << (14 - BIT_DEPTH);
816  src += srcstride;
817  dst += dststride;
818  }
819 }
820 
821 #define QPEL_FILTER_1(src, stride) \
822  (1 * -src[x - 3 * stride] + \
823  4 * src[x - 2 * stride] - \
824  10 * src[x - stride] + \
825  58 * src[x] + \
826  17 * src[x + stride] - \
827  5 * src[x + 2 * stride] + \
828  1 * src[x + 3 * stride])
829 
830 #define QPEL_FILTER_2(src, stride) \
831  (1 * -src[x - 3 * stride] + \
832  4 * src[x - 2 * stride] - \
833  11 * src[x - stride] + \
834  40 * src[x] + \
835  40 * src[x + stride] - \
836  11 * src[x + 2 * stride] + \
837  4 * src[x + 3 * stride] - \
838  1 * src[x + 4 * stride])
839 
840 #define QPEL_FILTER_3(src, stride) \
841  (1 * src[x - 2 * stride] - \
842  5 * src[x - stride] + \
843  17 * src[x] + \
844  58 * src[x + stride] - \
845  10 * src[x + 2 * stride] + \
846  4 * src[x + 3 * stride] - \
847  1 * src[x + 4 * stride])
848 
849 
850 #define PUT_HEVC_QPEL_H(H) \
851 static void FUNC(put_hevc_qpel_h ## H)(int16_t *dst, ptrdiff_t dststride, \
852  uint8_t *_src, ptrdiff_t _srcstride, \
853  int width, int height, \
854  int16_t* mcbuffer) \
855 { \
856  int x, y; \
857  pixel *src = (pixel*)_src; \
858  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
859  \
860  for (y = 0; y < height; y++) { \
861  for (x = 0; x < width; x++) \
862  dst[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \
863  src += srcstride; \
864  dst += dststride; \
865  } \
866 }
867 
868 #define PUT_HEVC_QPEL_V(V) \
869 static void FUNC(put_hevc_qpel_v ## V)(int16_t *dst, ptrdiff_t dststride, \
870  uint8_t *_src, ptrdiff_t _srcstride, \
871  int width, int height, \
872  int16_t* mcbuffer) \
873 { \
874  int x, y; \
875  pixel *src = (pixel*)_src; \
876  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
877  \
878  for (y = 0; y < height; y++) { \
879  for (x = 0; x < width; x++) \
880  dst[x] = QPEL_FILTER_ ## V(src, srcstride) >> (BIT_DEPTH - 8); \
881  src += srcstride; \
882  dst += dststride; \
883  } \
884 }
885 
886 #define PUT_HEVC_QPEL_HV(H, V) \
887 static void FUNC(put_hevc_qpel_h ## H ## v ## V)(int16_t *dst, \
888  ptrdiff_t dststride, \
889  uint8_t *_src, \
890  ptrdiff_t _srcstride, \
891  int width, int height, \
892  int16_t* mcbuffer) \
893 { \
894  int x, y; \
895  pixel *src = (pixel*)_src; \
896  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
897  \
898  int16_t tmp_array[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]; \
899  int16_t *tmp = tmp_array; \
900  \
901  src -= ff_hevc_qpel_extra_before[V] * srcstride; \
902  \
903  for (y = 0; y < height + ff_hevc_qpel_extra[V]; y++) { \
904  for (x = 0; x < width; x++) \
905  tmp[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \
906  src += srcstride; \
907  tmp += MAX_PB_SIZE; \
908  } \
909  \
910  tmp = tmp_array + ff_hevc_qpel_extra_before[V] * MAX_PB_SIZE; \
911  \
912  for (y = 0; y < height; y++) { \
913  for (x = 0; x < width; x++) \
914  dst[x] = QPEL_FILTER_ ## V(tmp, MAX_PB_SIZE) >> 6; \
915  tmp += MAX_PB_SIZE; \
916  dst += dststride; \
917  } \
918 }
919 
926 PUT_HEVC_QPEL_HV(1, 1)
927 PUT_HEVC_QPEL_HV(1, 2)
928 PUT_HEVC_QPEL_HV(1, 3)
929 PUT_HEVC_QPEL_HV(2, 1)
930 PUT_HEVC_QPEL_HV(2, 2)
931 PUT_HEVC_QPEL_HV(2, 3)
932 PUT_HEVC_QPEL_HV(3, 1)
933 PUT_HEVC_QPEL_HV(3, 2)
934 PUT_HEVC_QPEL_HV(3, 3)
935 
936 static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
937  uint8_t *_src, ptrdiff_t _srcstride,
938  int width, int height, int mx, int my,
939  int16_t* mcbuffer)
940 {
941  int x, y;
942  pixel *src = (pixel *)_src;
943  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
944 
945  for (y = 0; y < height; y++) {
946  for (x = 0; x < width; x++)
947  dst[x] = src[x] << (14 - BIT_DEPTH);
948  src += srcstride;
949  dst += dststride;
950  }
951 }
952 
953 #define EPEL_FILTER(src, stride) \
954  (filter_0 * src[x - stride] + \
955  filter_1 * src[x] + \
956  filter_2 * src[x + stride] + \
957  filter_3 * src[x + 2 * stride])
958 
959 static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
960  uint8_t *_src, ptrdiff_t _srcstride,
961  int width, int height, int mx, int my,
962  int16_t* mcbuffer)
963 {
964  int x, y;
965  pixel *src = (pixel *)_src;
966  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
967  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
968  int8_t filter_0 = filter[0];
969  int8_t filter_1 = filter[1];
970  int8_t filter_2 = filter[2];
971  int8_t filter_3 = filter[3];
972  for (y = 0; y < height; y++) {
973  for (x = 0; x < width; x++)
974  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
975  src += srcstride;
976  dst += dststride;
977  }
978 }
979 
980 static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
981  uint8_t *_src, ptrdiff_t _srcstride,
982  int width, int height, int mx, int my,
983  int16_t* mcbuffer)
984 {
985  int x, y;
986  pixel *src = (pixel *)_src;
987  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
988  const int8_t *filter = ff_hevc_epel_filters[my - 1];
989  int8_t filter_0 = filter[0];
990  int8_t filter_1 = filter[1];
991  int8_t filter_2 = filter[2];
992  int8_t filter_3 = filter[3];
993 
994  for (y = 0; y < height; y++) {
995  for (x = 0; x < width; x++)
996  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
997  src += srcstride;
998  dst += dststride;
999  }
1000 }
1001 
1002 static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
1003  uint8_t *_src, ptrdiff_t _srcstride,
1004  int width, int height, int mx, int my,
1005  int16_t* mcbuffer)
1006 {
1007  int x, y;
1008  pixel *src = (pixel *)_src;
1009  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1010  const int8_t *filter_h = ff_hevc_epel_filters[mx - 1];
1011  const int8_t *filter_v = ff_hevc_epel_filters[my - 1];
1012  int8_t filter_0 = filter_h[0];
1013  int8_t filter_1 = filter_h[1];
1014  int8_t filter_2 = filter_h[2];
1015  int8_t filter_3 = filter_h[3];
1016  int16_t tmp_array[(MAX_PB_SIZE + 3) * MAX_PB_SIZE];
1017  int16_t *tmp = tmp_array;
1018 
1019  src -= EPEL_EXTRA_BEFORE * srcstride;
1020 
1021  for (y = 0; y < height + EPEL_EXTRA; y++) {
1022  for (x = 0; x < width; x++)
1023  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1024  src += srcstride;
1025  tmp += MAX_PB_SIZE;
1026  }
1027 
1028  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1029  filter_0 = filter_v[0];
1030  filter_1 = filter_v[1];
1031  filter_2 = filter_v[2];
1032  filter_3 = filter_v[3];
1033  for (y = 0; y < height; y++) {
1034  for (x = 0; x < width; x++)
1035  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1036  tmp += MAX_PB_SIZE;
1037  dst += dststride;
1038  }
1039 }
1040 
1041 static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
1042  int16_t *src, ptrdiff_t srcstride,
1043  int width, int height)
1044 {
1045  int x, y;
1046  pixel *dst = (pixel *)_dst;
1047  ptrdiff_t dststride = _dststride / sizeof(pixel);
1048 
1049  int shift = 14 - BIT_DEPTH;
1050 #if BIT_DEPTH < 14
1051  int offset = 1 << (shift - 1);
1052 #else
1053  int offset = 0;
1054 #endif
1055  for (y = 0; y < height; y++) {
1056  for (x = 0; x < width; x++)
1057  dst[x] = av_clip_pixel((src[x] + offset) >> shift);
1058  dst += dststride;
1059  src += srcstride;
1060  }
1061 }
1062 
1063 static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
1064  int16_t *src1, int16_t *src2,
1065  ptrdiff_t srcstride,
1066  int width, int height)
1067 {
1068  int x, y;
1069  pixel *dst = (pixel *)_dst;
1070  ptrdiff_t dststride = _dststride / sizeof(pixel);
1071 
1072  int shift = 14 + 1 - BIT_DEPTH;
1073 #if BIT_DEPTH < 14
1074  int offset = 1 << (shift - 1);
1075 #else
1076  int offset = 0;
1077 #endif
1078 
1079  for (y = 0; y < height; y++) {
1080  for (x = 0; x < width; x++)
1081  dst[x] = av_clip_pixel((src1[x] + src2[x] + offset) >> shift);
1082  dst += dststride;
1083  src1 += srcstride;
1084  src2 += srcstride;
1085  }
1086 }
1087 
1088 static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
1089  uint8_t *_dst, ptrdiff_t _dststride,
1090  int16_t *src, ptrdiff_t srcstride,
1091  int width, int height)
1092 {
1093  int shift, log2Wd, wx, ox, x, y, offset;
1094  pixel *dst = (pixel *)_dst;
1095  ptrdiff_t dststride = _dststride / sizeof(pixel);
1096 
1097  shift = 14 - BIT_DEPTH;
1098  log2Wd = denom + shift;
1099  offset = 1 << (log2Wd - 1);
1100  wx = wlxFlag;
1101  ox = olxFlag * (1 << (BIT_DEPTH - 8));
1102 
1103  for (y = 0; y < height; y++) {
1104  for (x = 0; x < width; x++) {
1105  if (log2Wd >= 1) {
1106  dst[x] = av_clip_pixel(((src[x] * wx + offset) >> log2Wd) + ox);
1107  } else {
1108  dst[x] = av_clip_pixel(src[x] * wx + ox);
1109  }
1110  }
1111  dst += dststride;
1112  src += srcstride;
1113  }
1114 }
1115 
1116 static void FUNC(weighted_pred_avg)(uint8_t denom,
1117  int16_t wl0Flag, int16_t wl1Flag,
1118  int16_t ol0Flag, int16_t ol1Flag,
1119  uint8_t *_dst, ptrdiff_t _dststride,
1120  int16_t *src1, int16_t *src2,
1121  ptrdiff_t srcstride,
1122  int width, int height)
1123 {
1124  int shift, log2Wd, w0, w1, o0, o1, x, y;
1125  pixel *dst = (pixel *)_dst;
1126  ptrdiff_t dststride = _dststride / sizeof(pixel);
1127 
1128  shift = 14 - BIT_DEPTH;
1129  log2Wd = denom + shift;
1130  w0 = wl0Flag;
1131  w1 = wl1Flag;
1132  o0 = ol0Flag * (1 << (BIT_DEPTH - 8));
1133  o1 = ol1Flag * (1 << (BIT_DEPTH - 8));
1134 
1135  for (y = 0; y < height; y++) {
1136  for (x = 0; x < width; x++)
1137  dst[x] = av_clip_pixel((src1[x] * w0 + src2[x] * w1 +
1138  ((o0 + o1 + 1) << log2Wd)) >> (log2Wd + 1));
1139  dst += dststride;
1140  src1 += srcstride;
1141  src2 += srcstride;
1142  }
1143 }
1144 
1145 // line zero
1146 #define P3 pix[-4 * xstride]
1147 #define P2 pix[-3 * xstride]
1148 #define P1 pix[-2 * xstride]
1149 #define P0 pix[-1 * xstride]
1150 #define Q0 pix[0 * xstride]
1151 #define Q1 pix[1 * xstride]
1152 #define Q2 pix[2 * xstride]
1153 #define Q3 pix[3 * xstride]
1154 
1155 // line three. used only for deblocking decision
1156 #define TP3 pix[-4 * xstride + 3 * ystride]
1157 #define TP2 pix[-3 * xstride + 3 * ystride]
1158 #define TP1 pix[-2 * xstride + 3 * ystride]
1159 #define TP0 pix[-1 * xstride + 3 * ystride]
1160 #define TQ0 pix[0 * xstride + 3 * ystride]
1161 #define TQ1 pix[1 * xstride + 3 * ystride]
1162 #define TQ2 pix[2 * xstride + 3 * ystride]
1163 #define TQ3 pix[3 * xstride + 3 * ystride]
1164 
1166  ptrdiff_t _xstride, ptrdiff_t _ystride,
1167  int *_beta, int *_tc,
1168  uint8_t *_no_p, uint8_t *_no_q)
1169 {
1170  int d, j;
1171  pixel *pix = (pixel *)_pix;
1172  ptrdiff_t xstride = _xstride / sizeof(pixel);
1173  ptrdiff_t ystride = _ystride / sizeof(pixel);
1174 
1175  for (j = 0; j < 2; j++) {
1176  const int dp0 = abs(P2 - 2 * P1 + P0);
1177  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1178  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1179  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1180  const int d0 = dp0 + dq0;
1181  const int d3 = dp3 + dq3;
1182  const int beta = _beta[j] << (BIT_DEPTH - 8);
1183  const int tc = _tc[j] << (BIT_DEPTH - 8);
1184  const int no_p = _no_p[j];
1185  const int no_q = _no_q[j];
1186 
1187  if (d0 + d3 >= beta) {
1188  pix += 4 * ystride;
1189  continue;
1190  } else {
1191  const int beta_3 = beta >> 3;
1192  const int beta_2 = beta >> 2;
1193  const int tc25 = ((tc * 5 + 1) >> 1);
1194 
1195  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1196  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1197  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1198  // strong filtering
1199  const int tc2 = tc << 1;
1200  for (d = 0; d < 4; d++) {
1201  const int p3 = P3;
1202  const int p2 = P2;
1203  const int p1 = P1;
1204  const int p0 = P0;
1205  const int q0 = Q0;
1206  const int q1 = Q1;
1207  const int q2 = Q2;
1208  const int q3 = Q3;
1209  if (!no_p) {
1210  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1211  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1212  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1213  }
1214  if (!no_q) {
1215  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1216  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1217  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1218  }
1219  pix += ystride;
1220  }
1221  } else { // normal filtering
1222  int nd_p = 1;
1223  int nd_q = 1;
1224  const int tc_2 = tc >> 1;
1225  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1226  nd_p = 2;
1227  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1228  nd_q = 2;
1229 
1230  for (d = 0; d < 4; d++) {
1231  const int p2 = P2;
1232  const int p1 = P1;
1233  const int p0 = P0;
1234  const int q0 = Q0;
1235  const int q1 = Q1;
1236  const int q2 = Q2;
1237  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1238  if (abs(delta0) < 10 * tc) {
1239  delta0 = av_clip(delta0, -tc, tc);
1240  if (!no_p)
1241  P0 = av_clip_pixel(p0 + delta0);
1242  if (!no_q)
1243  Q0 = av_clip_pixel(q0 - delta0);
1244  if (!no_p && nd_p > 1) {
1245  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1246  P1 = av_clip_pixel(p1 + deltap1);
1247  }
1248  if (!no_q && nd_q > 1) {
1249  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1250  Q1 = av_clip_pixel(q1 + deltaq1);
1251  }
1252  }
1253  pix += ystride;
1254  }
1255  }
1256  }
1257  }
1258 }
1259 
1260 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1261  ptrdiff_t _ystride, int *_tc,
1262  uint8_t *_no_p, uint8_t *_no_q)
1263 {
1264  int d, j, no_p, no_q;
1265  pixel *pix = (pixel *)_pix;
1266  ptrdiff_t xstride = _xstride / sizeof(pixel);
1267  ptrdiff_t ystride = _ystride / sizeof(pixel);
1268 
1269  for (j = 0; j < 2; j++) {
1270  const int tc = _tc[j] << (BIT_DEPTH - 8);
1271  if (tc <= 0) {
1272  pix += 4 * ystride;
1273  continue;
1274  }
1275  no_p = _no_p[j];
1276  no_q = _no_q[j];
1277 
1278  for (d = 0; d < 4; d++) {
1279  int delta0;
1280  const int p1 = P1;
1281  const int p0 = P0;
1282  const int q0 = Q0;
1283  const int q1 = Q1;
1284  delta0 = av_clip((((q0 - p0) << 2) + p1 - q1 + 4) >> 3, -tc, tc);
1285  if (!no_p)
1286  P0 = av_clip_pixel(p0 + delta0);
1287  if (!no_q)
1288  Q0 = av_clip_pixel(q0 - delta0);
1289  pix += ystride;
1290  }
1291  }
1292 }
1293 
1294 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1295  int *tc, uint8_t *no_p,
1296  uint8_t *no_q)
1297 {
1298  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1299 }
1300 
1301 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1302  int *tc, uint8_t *no_p,
1303  uint8_t *no_q)
1304 {
1305  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1306 }
1307 
1308 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1309  int *beta, int *tc, uint8_t *no_p,
1310  uint8_t *no_q)
1311 {
1312  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1313  beta, tc, no_p, no_q);
1314 }
1315 
1316 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1317  int *beta, int *tc, uint8_t *no_p,
1318  uint8_t *no_q)
1319 {
1320  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1321  beta, tc, no_p, no_q);
1322 }
1323 
1324 #undef P3
1325 #undef P2
1326 #undef P1
1327 #undef P0
1328 #undef Q0
1329 #undef Q1
1330 #undef Q2
1331 #undef Q3
1332 
1333 #undef TP3
1334 #undef TP2
1335 #undef TP1
1336 #undef TP0
1337 #undef TQ0
1338 #undef TQ1
1339 #undef TQ2
1340 #undef TQ3
static void FUNC() weighted_pred_avg(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag, int16_t ol0Flag, int16_t ol1Flag, uint8_t *_dst, ptrdiff_t _dststride, int16_t *src1, int16_t *src2, ptrdiff_t srcstride, int width, int height)
static const int16_t coeffs[28]
#define TR_16(dst, src, dstep, sstep, assign)
int size
static void FUNC() transform_32x32_add(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
static void FUNC() sao_edge_filter_3(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:240
static void FUNC() put_hevc_epel_h(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my, int16_t *mcbuffer)
#define P1
#define PUT_HEVC_QPEL_HV(H, V)
#define Q0
#define BIT_DEPTH
static void FUNC() sao_edge_filter_1(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
static void FUNC() transquant_bypass16x16(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
static void FUNC() sao_band_filter_1(uint8_t *dst, uint8_t *src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, int c_idx)
#define MAX_PB_SIZE
Definition: hevc.h:58
static void FUNC() transquant_bypass4x4(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
int stride
Definition: mace.c:144
static void FUNC() hevc_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int *beta, int *tc, uint8_t *no_p, uint8_t *no_q)
#define av_clip_pixel(a)
#define Q3
static void FUNC() hevc_loop_filter_luma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
#define TQ1
#define P0
uint8_t
#define TQ2
bitstream reader API header.
#define EPEL_EXTRA_BEFORE
Definition: hevc.h:70
#define TP3
static void FUNC() hevc_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q)
static void FUNC() put_hevc_qpel_pixels(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int16_t *mcbuffer)
#define P3
static void FUNC() transform_4x4_luma_add(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
#define FUNC(a)
#define TR_8(dst, src, dstep, sstep, assign)
static void FUNC() put_pcm(uint8_t *_dst, ptrdiff_t stride, int size, GetBitContext *gb, int pcm_bit_depth)
#define TR_32(dst, src, dstep, sstep, assign)
static void FUNC() sao_band_filter(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, int c_idx, int class)
#define TP2
static void FUNC() hevc_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int *beta, int *tc, uint8_t *no_p, uint8_t *no_q)
#define EPEL_FILTER(src, stride)
static void FUNC() weighted_pred(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, uint8_t *_dst, ptrdiff_t _dststride, int16_t *src, ptrdiff_t srcstride, int width, int height)
static void filter(MpegAudioContext *s, int ch, const short *samples, int incr)
Definition: mpegaudioenc.c:307
static void FUNC() transquant_bypass32x32(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
static void FUNC() put_unweighted_pred(uint8_t *_dst, ptrdiff_t _dststride, int16_t *src, ptrdiff_t srcstride, int width, int height)
#define SCALE(dst, x)
static void FUNC() transform_16x16_add(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
#define pixel
int offset_val[3][5]
SaoOffsetVal.
Definition: hevcdsp.h:36
static int width
Definition: utils.c:156
static void FUNC() put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my, int16_t *mcbuffer)
#define Q1
#define TP0
#define PUT_HEVC_QPEL_H(H)
static void FUNC() hevc_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
#define TR_4(dst, src, dstep, sstep, assign)
static void FUNC() sao_edge_filter_2(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
static void FUNC() transform_8x8_add(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
static void FUNC() transquant_bypass8x8(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
#define ADD_AND_SCALE(dst, x)
#define EPEL_EXTRA
Definition: hevc.h:72
static void FUNC() put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my, int16_t *mcbuffer)
static void FUNC() sao_band_filter_3(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, int c_idx)
static void FUNC() transform_skip(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
static void FUNC() sao_band_filter_0(uint8_t *dst, uint8_t *src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, int c_idx)
static void FUNC() put_weighted_pred_avg(uint8_t *_dst, ptrdiff_t _dststride, int16_t *src1, int16_t *src2, ptrdiff_t srcstride, int width, int height)
int height
Definition: gxfenc.c:72
#define CMP(a, b)
#define PUT_HEVC_QPEL_V(V)
#define TP1
static void FUNC() sao_band_filter_2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, SAOParams *sao, int *borders, int width, int height, int c_idx)
#define TR_4x4_LUMA(dst, src, step, assign)
static void FUNC() put_hevc_epel_pixels(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my, int16_t *mcbuffer)
#define P2
static const uint8_t offset_table[]
Definition: escape130.c:40
#define TQ3
static void FUNC() hevc_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q)
static void FUNC() sao_edge_filter_0(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
static void FUNC() transform_4x4_add(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride)
#define TQ0
#define Q2
const int8_t ff_hevc_epel_filters[7][16]
Definition: hevcdsp.c:92