swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <assert.h>
22 #include <inttypes.h>
23 #include <math.h>
24 #include <stdio.h>
25 #include <string.h>
26 
27 #include "libavutil/avutil.h"
28 #include "libavutil/bswap.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/pixdesc.h"
33 #include "config.h"
34 #include "rgb2rgb.h"
35 #include "swscale_internal.h"
36 #include "swscale.h"
37 
39  { 36, 68, 60, 92, 34, 66, 58, 90, },
40  { 100, 4, 124, 28, 98, 2, 122, 26, },
41  { 52, 84, 44, 76, 50, 82, 42, 74, },
42  { 116, 20, 108, 12, 114, 18, 106, 10, },
43  { 32, 64, 56, 88, 38, 70, 62, 94, },
44  { 96, 0, 120, 24, 102, 6, 126, 30, },
45  { 48, 80, 40, 72, 54, 86, 46, 78, },
46  { 112, 16, 104, 8, 118, 22, 110, 14, },
47 };
48 
50  64, 64, 64, 64, 64, 64, 64, 64
51 };
52 
53 static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
54  int height, int y, uint8_t val)
55 {
56  int i;
57  uint8_t *ptr = plane + stride * y;
58  for (i = 0; i < height; i++) {
59  memset(ptr, val, width);
60  ptr += stride;
61  }
62 }
63 
64 static void fill_plane9or10(uint8_t *plane, int stride, int width,
65  int height, int y, uint8_t val,
66  const int dst_depth, const int big_endian)
67 {
68  int i, j;
69  uint16_t *dst = (uint16_t *) (plane + stride * y);
70 #define FILL8TO9_OR_10(wfunc) \
71  for (i = 0; i < height; i++) { \
72  for (j = 0; j < width; j++) { \
73  wfunc(&dst[j], (val << (dst_depth - 8)) | \
74  (val >> (16 - dst_depth))); \
75  } \
76  dst += stride / 2; \
77  }
78  if (big_endian) {
80  } else {
82  }
83 }
84 
85 
86 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
87  const uint8_t *_src, const int16_t *filter,
88  const int32_t *filterPos, int filterSize)
89 {
91  int i;
92  int32_t *dst = (int32_t *) _dst;
93  const uint16_t *src = (const uint16_t *) _src;
94  int bits = desc->comp[0].depth_minus1;
95  int sh = bits - 4;
96 
97  for (i = 0; i < dstW; i++) {
98  int j;
99  int srcPos = filterPos[i];
100  int val = 0;
101 
102  for (j = 0; j < filterSize; j++) {
103  val += src[srcPos + j] * filter[filterSize * i + j];
104  }
105  // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
106  dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
107  }
108 }
109 
110 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
111  const uint8_t *_src, const int16_t *filter,
112  const int32_t *filterPos, int filterSize)
113 {
115  int i;
116  const uint16_t *src = (const uint16_t *) _src;
117  int sh = desc->comp[0].depth_minus1;
118 
119  for (i = 0; i < dstW; i++) {
120  int j;
121  int srcPos = filterPos[i];
122  int val = 0;
123 
124  for (j = 0; j < filterSize; j++) {
125  val += src[srcPos + j] * filter[filterSize * i + j];
126  }
127  // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
128  dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
129  }
130 }
131 
132 // bilinear / bicubic scaling
133 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW,
134  const uint8_t *src, const int16_t *filter,
135  const int32_t *filterPos, int filterSize)
136 {
137  int i;
138  for (i = 0; i < dstW; i++) {
139  int j;
140  int srcPos = filterPos[i];
141  int val = 0;
142  for (j = 0; j < filterSize; j++) {
143  val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
144  }
145  dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ...
146  }
147 }
148 
149 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW,
150  const uint8_t *src, const int16_t *filter,
151  const int32_t *filterPos, int filterSize)
152 {
153  int i;
154  int32_t *dst = (int32_t *) _dst;
155  for (i = 0; i < dstW; i++) {
156  int j;
157  int srcPos = filterPos[i];
158  int val = 0;
159  for (j = 0; j < filterSize; j++) {
160  val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
161  }
162  dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
163  }
164 }
165 
166 // FIXME all pal and rgb srcFormats could do this conversion as well
167 // FIXME all scalers more complex than bilinear could do half of this transform
168 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
169 {
170  int i;
171  for (i = 0; i < width; i++) {
172  dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264
173  dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264
174  }
175 }
176 
177 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
178 {
179  int i;
180  for (i = 0; i < width; i++) {
181  dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469
182  dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469
183  }
184 }
185 
186 static void lumRangeToJpeg_c(int16_t *dst, int width)
187 {
188  int i;
189  for (i = 0; i < width; i++)
190  dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
191 }
192 
193 static void lumRangeFromJpeg_c(int16_t *dst, int width)
194 {
195  int i;
196  for (i = 0; i < width; i++)
197  dst[i] = (dst[i] * 14071 + 33561947) >> 14;
198 }
199 
200 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
201 {
202  int i;
203  int32_t *dstU = (int32_t *) _dstU;
204  int32_t *dstV = (int32_t *) _dstV;
205  for (i = 0; i < width; i++) {
206  dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
207  dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264
208  }
209 }
210 
211 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
212 {
213  int i;
214  int32_t *dstU = (int32_t *) _dstU;
215  int32_t *dstV = (int32_t *) _dstV;
216  for (i = 0; i < width; i++) {
217  dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469
218  dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469
219  }
220 }
221 
222 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
223 {
224  int i;
225  int32_t *dst = (int32_t *) _dst;
226  for (i = 0; i < width; i++)
227  dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
228 }
229 
230 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
231 {
232  int i;
233  int32_t *dst = (int32_t *) _dst;
234  for (i = 0; i < width; i++)
235  dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
236 }
237 
238 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
239  const uint8_t *src, int srcW, int xInc)
240 {
241  int i;
242  unsigned int xpos = 0;
243  for (i = 0; i < dstWidth; i++) {
244  register unsigned int xx = xpos >> 16;
245  register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
246  dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
247  xpos += xInc;
248  }
249 }
250 
251 // *** horizontal scale Y line to temp buffer
252 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
253  const uint8_t *src_in[4],
254  int srcW, int xInc,
255  const int16_t *hLumFilter,
256  const int32_t *hLumFilterPos,
257  int hLumFilterSize,
259  uint32_t *pal, int isAlpha)
260 {
261  void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) =
262  isAlpha ? c->alpToYV12 : c->lumToYV12;
263  void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
264  const uint8_t *src = src_in[isAlpha ? 3 : 0];
265 
266  if (toYV12) {
267  toYV12(formatConvBuffer, src, srcW, pal);
268  src = formatConvBuffer;
269  } else if (c->readLumPlanar && !isAlpha) {
270  c->readLumPlanar(formatConvBuffer, src_in, srcW);
271  src = formatConvBuffer;
272  }
273 
274  if (!c->hyscale_fast) {
275  c->hyScale(c, dst, dstWidth, src, hLumFilter,
276  hLumFilterPos, hLumFilterSize);
277  } else { // fast bilinear upscale / crap downscale
278  c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
279  }
280 
281  if (convertRange)
282  convertRange(dst, dstWidth);
283 }
284 
285 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
286  int dstWidth, const uint8_t *src1,
287  const uint8_t *src2, int srcW, int xInc)
288 {
289  int i;
290  unsigned int xpos = 0;
291  for (i = 0; i < dstWidth; i++) {
292  register unsigned int xx = xpos >> 16;
293  register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
294  dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
295  dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
296  xpos += xInc;
297  }
298 }
299 
300 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
301  int16_t *dst2, int dstWidth,
302  const uint8_t *src_in[4],
303  int srcW, int xInc,
304  const int16_t *hChrFilter,
305  const int32_t *hChrFilterPos,
306  int hChrFilterSize,
307  uint8_t *formatConvBuffer, uint32_t *pal)
308 {
309  const uint8_t *src1 = src_in[1], *src2 = src_in[2];
310  if (c->chrToYV12) {
311  uint8_t *buf2 = formatConvBuffer +
312  FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
313  c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
314  src1 = formatConvBuffer;
315  src2 = buf2;
316  } else if (c->readChrPlanar) {
317  uint8_t *buf2 = formatConvBuffer +
318  FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
319  c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
320  src1 = formatConvBuffer;
321  src2 = buf2;
322  }
323 
324  if (!c->hcscale_fast) {
325  c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
326  c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
327  } else { // fast bilinear upscale / crap downscale
328  c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
329  }
330 
331  if (c->chrConvertRange)
332  c->chrConvertRange(dst1, dst2, dstWidth);
333 }
334 
335 #define DEBUG_SWSCALE_BUFFERS 0
336 #define DEBUG_BUFFERS(...) \
337  if (DEBUG_SWSCALE_BUFFERS) \
338  av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
339 
340 static int swScale(SwsContext *c, const uint8_t *src[],
341  int srcStride[], int srcSliceY,
342  int srcSliceH, uint8_t *dst[], int dstStride[])
343 {
344  /* load a few things into local vars to make the code more readable?
345  * and faster */
346  const int srcW = c->srcW;
347  const int dstW = c->dstW;
348  const int dstH = c->dstH;
349  const int chrDstW = c->chrDstW;
350  const int chrSrcW = c->chrSrcW;
351  const int lumXInc = c->lumXInc;
352  const int chrXInc = c->chrXInc;
353  const enum AVPixelFormat dstFormat = c->dstFormat;
354  const int flags = c->flags;
359  int16_t *vLumFilter = c->vLumFilter;
360  int16_t *vChrFilter = c->vChrFilter;
361  int16_t *hLumFilter = c->hLumFilter;
362  int16_t *hChrFilter = c->hChrFilter;
365  const int vLumFilterSize = c->vLumFilterSize;
366  const int vChrFilterSize = c->vChrFilterSize;
367  const int hLumFilterSize = c->hLumFilterSize;
368  const int hChrFilterSize = c->hChrFilterSize;
369  int16_t **lumPixBuf = c->lumPixBuf;
370  int16_t **chrUPixBuf = c->chrUPixBuf;
371  int16_t **chrVPixBuf = c->chrVPixBuf;
372  int16_t **alpPixBuf = c->alpPixBuf;
373  const int vLumBufSize = c->vLumBufSize;
374  const int vChrBufSize = c->vChrBufSize;
376  uint32_t *pal = c->pal_yuv;
383  const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
384  const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample);
385  int should_dither = is9_OR_10BPS(c->srcFormat) ||
386  is16BPS(c->srcFormat);
387  int lastDstY;
388 
389  /* vars which will change and which we need to store back in the context */
390  int dstY = c->dstY;
391  int lumBufIndex = c->lumBufIndex;
392  int chrBufIndex = c->chrBufIndex;
393  int lastInLumBuf = c->lastInLumBuf;
394  int lastInChrBuf = c->lastInChrBuf;
395 
396  if (isPacked(c->srcFormat)) {
397  src[0] =
398  src[1] =
399  src[2] =
400  src[3] = src[0];
401  srcStride[0] =
402  srcStride[1] =
403  srcStride[2] =
404  srcStride[3] = srcStride[0];
405  }
406  srcStride[1] <<= c->vChrDrop;
407  srcStride[2] <<= c->vChrDrop;
408 
409  DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
410  src[0], srcStride[0], src[1], srcStride[1],
411  src[2], srcStride[2], src[3], srcStride[3],
412  dst[0], dstStride[0], dst[1], dstStride[1],
413  dst[2], dstStride[2], dst[3], dstStride[3]);
414  DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
415  srcSliceY, srcSliceH, dstY, dstH);
416  DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
417  vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
418 
419  if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
420  dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
421  static int warnedAlready = 0; // FIXME maybe move this into the context
422  if (flags & SWS_PRINT_INFO && !warnedAlready) {
424  "Warning: dstStride is not aligned!\n"
425  " ->cannot do aligned memory accesses anymore\n");
426  warnedAlready = 1;
427  }
428  }
429 
430  /* Note the user might start scaling the picture in the middle so this
431  * will not get executed. This is not really intended but works
432  * currently, so people might do it. */
433  if (srcSliceY == 0) {
434  lumBufIndex = -1;
435  chrBufIndex = -1;
436  dstY = 0;
437  lastInLumBuf = -1;
438  lastInChrBuf = -1;
439  }
440 
441  if (!should_dither) {
443  }
444  lastDstY = dstY;
445 
446  for (; dstY < dstH; dstY++) {
447  const int chrDstY = dstY >> c->chrDstVSubSample;
448  uint8_t *dest[4] = {
449  dst[0] + dstStride[0] * dstY,
450  dst[1] + dstStride[1] * chrDstY,
451  dst[2] + dstStride[2] * chrDstY,
452  (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
453  };
454 
455  // First line needed as input
456  const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
457  const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
458  // First line needed as input
459  const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
460 
461  // Last line needed as input
462  int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
463  int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
464  int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
465  int enough_lines;
466 
467  // handle holes (FAST_BILINEAR & weird filters)
468  if (firstLumSrcY > lastInLumBuf)
469  lastInLumBuf = firstLumSrcY - 1;
470  if (firstChrSrcY > lastInChrBuf)
471  lastInChrBuf = firstChrSrcY - 1;
472  assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
473  assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
474 
475  DEBUG_BUFFERS("dstY: %d\n", dstY);
476  DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
477  firstLumSrcY, lastLumSrcY, lastInLumBuf);
478  DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
479  firstChrSrcY, lastChrSrcY, lastInChrBuf);
480 
481  // Do we have enough lines in this slice to output the dstY line
482  enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
483  lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
484 
485  if (!enough_lines) {
486  lastLumSrcY = srcSliceY + srcSliceH - 1;
487  lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
488  DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
489  lastLumSrcY, lastChrSrcY);
490  }
491 
492  // Do horizontal scaling
493  while (lastInLumBuf < lastLumSrcY) {
494  const uint8_t *src1[4] = {
495  src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
496  src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
497  src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
498  src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
499  };
500  lumBufIndex++;
501  assert(lumBufIndex < 2 * vLumBufSize);
502  assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
503  assert(lastInLumBuf + 1 - srcSliceY >= 0);
504  hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
505  hLumFilter, hLumFilterPos, hLumFilterSize,
506  formatConvBuffer, pal, 0);
507  if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
508  hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
509  lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
510  formatConvBuffer, pal, 1);
511  lastInLumBuf++;
512  DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
513  lumBufIndex, lastInLumBuf);
514  }
515  while (lastInChrBuf < lastChrSrcY) {
516  const uint8_t *src1[4] = {
517  src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
518  src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
519  src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
520  src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
521  };
522  chrBufIndex++;
523  assert(chrBufIndex < 2 * vChrBufSize);
524  assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
525  assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
526  // FIXME replace parameters through context struct (some at least)
527 
528  if (c->needs_hcscale)
529  hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
530  chrDstW, src1, chrSrcW, chrXInc,
531  hChrFilter, hChrFilterPos, hChrFilterSize,
532  formatConvBuffer, pal);
533  lastInChrBuf++;
534  DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
535  chrBufIndex, lastInChrBuf);
536  }
537  // wrap buf index around to stay inside the ring buffer
538  if (lumBufIndex >= vLumBufSize)
539  lumBufIndex -= vLumBufSize;
540  if (chrBufIndex >= vChrBufSize)
541  chrBufIndex -= vChrBufSize;
542  if (!enough_lines)
543  break; // we can't output a dstY line so let's try with the next slice
544 
545 #if HAVE_MMX_INLINE
546  updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
547  lastInLumBuf, lastInChrBuf);
548 #endif
549  if (should_dither) {
550  c->chrDither8 = dither_8x8_128[chrDstY & 7];
551  c->lumDither8 = dither_8x8_128[dstY & 7];
552  }
553  if (dstY >= dstH - 2) {
554  /* hmm looks like we can't use MMX here without overwriting
555  * this array's tail */
556  ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
557  &yuv2packed1, &yuv2packed2, &yuv2packedX);
558  }
559 
560  {
561  const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
562  const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
563  const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
564  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
565  (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
566 
567  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
568  const int16_t **tmpY = (const int16_t **)lumPixBuf +
569  2 * vLumBufSize;
570  int neg = -firstLumSrcY, i;
571  int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
572  for (i = 0; i < neg; i++)
573  tmpY[i] = lumSrcPtr[neg];
574  for (; i < end; i++)
575  tmpY[i] = lumSrcPtr[i];
576  for (; i < vLumFilterSize; i++)
577  tmpY[i] = tmpY[i - 1];
578  lumSrcPtr = tmpY;
579 
580  if (alpSrcPtr) {
581  const int16_t **tmpA = (const int16_t **)alpPixBuf +
582  2 * vLumBufSize;
583  for (i = 0; i < neg; i++)
584  tmpA[i] = alpSrcPtr[neg];
585  for (; i < end; i++)
586  tmpA[i] = alpSrcPtr[i];
587  for (; i < vLumFilterSize; i++)
588  tmpA[i] = tmpA[i - 1];
589  alpSrcPtr = tmpA;
590  }
591  }
592  if (firstChrSrcY < 0 ||
593  firstChrSrcY + vChrFilterSize > c->chrSrcH) {
594  const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize,
595  **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize;
596  int neg = -firstChrSrcY, i;
597  int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
598  for (i = 0; i < neg; i++) {
599  tmpU[i] = chrUSrcPtr[neg];
600  tmpV[i] = chrVSrcPtr[neg];
601  }
602  for (; i < end; i++) {
603  tmpU[i] = chrUSrcPtr[i];
604  tmpV[i] = chrVSrcPtr[i];
605  }
606  for (; i < vChrFilterSize; i++) {
607  tmpU[i] = tmpU[i - 1];
608  tmpV[i] = tmpV[i - 1];
609  }
610  chrUSrcPtr = tmpU;
611  chrVSrcPtr = tmpV;
612  }
613 
614  if (isPlanarYUV(dstFormat) ||
615  (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
616  const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
617 
618  if (vLumFilterSize == 1) {
619  yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
620  } else {
621  yuv2planeX(vLumFilter + dstY * vLumFilterSize,
622  vLumFilterSize, lumSrcPtr, dest[0],
623  dstW, c->lumDither8, 0);
624  }
625 
626  if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
627  if (yuv2nv12cX) {
628  yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
629  vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
630  dest[1], chrDstW);
631  } else if (vChrFilterSize == 1) {
632  yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
633  yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
634  } else {
635  yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
636  vChrFilterSize, chrUSrcPtr, dest[1],
637  chrDstW, c->chrDither8, 0);
638  yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
639  vChrFilterSize, chrVSrcPtr, dest[2],
640  chrDstW, c->chrDither8, 3);
641  }
642  }
643 
644  if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
645  if (vLumFilterSize == 1) {
646  yuv2plane1(alpSrcPtr[0], dest[3], dstW,
647  c->lumDither8, 0);
648  } else {
649  yuv2planeX(vLumFilter + dstY * vLumFilterSize,
650  vLumFilterSize, alpSrcPtr, dest[3],
651  dstW, c->lumDither8, 0);
652  }
653  }
654  } else {
655  if (c->yuv2packed1 && vLumFilterSize == 1 &&
656  vChrFilterSize <= 2) { // unscaled RGB
657  int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
658  yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
659  alpPixBuf ? *alpSrcPtr : NULL,
660  dest[0], dstW, chrAlpha, dstY);
661  } else if (c->yuv2packed2 && vLumFilterSize == 2 &&
662  vChrFilterSize == 2) { // bilinear upscale RGB
663  int lumAlpha = vLumFilter[2 * dstY + 1];
664  int chrAlpha = vChrFilter[2 * dstY + 1];
665  lumMmxFilter[2] =
666  lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001;
667  chrMmxFilter[2] =
668  chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
669  yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
670  alpPixBuf ? alpSrcPtr : NULL,
671  dest[0], dstW, lumAlpha, chrAlpha, dstY);
672  } else { // general RGB
673  yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
674  lumSrcPtr, vLumFilterSize,
675  vChrFilter + dstY * vChrFilterSize,
676  chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
677  alpSrcPtr, dest[0], dstW, dstY);
678  }
679  }
680  }
681  }
682 
683  if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) {
684  int length = dstW;
685  int height = dstY - lastDstY;
686  if (is16BPS(c->dstFormat))
687  length *= 2;
688 
689  if (is9_OR_10BPS(dstFormat)) {
690  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
691  fill_plane9or10(dst[3], dstStride[3], length, height, lastDstY,
692  255, desc->comp[3].depth_minus1 + 1,
693  isBE(dstFormat));
694  } else
695  fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
696  }
697 
698 #if HAVE_MMXEXT_INLINE
700  __asm__ volatile ("sfence" ::: "memory");
701 #endif
702  emms_c();
703 
704  /* store changed local vars back in the context */
705  c->dstY = dstY;
710 
711  return dstY - lastDstY;
712 }
713 
715 {
717 
719  &c->yuv2nv12cX, &c->yuv2packed1,
720  &c->yuv2packed2, &c->yuv2packedX);
721 
723 
724  if (c->srcBpc == 8) {
725  if (c->dstBpc <= 10) {
726  c->hyScale = c->hcScale = hScale8To15_c;
727  if (c->flags & SWS_FAST_BILINEAR) {
730  }
731  } else {
732  c->hyScale = c->hcScale = hScale8To19_c;
733  }
734  } else {
735  c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
736  : hScale16To15_c;
737  }
738 
739  if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
740  if (c->dstBpc <= 10) {
741  if (c->srcRange) {
744  } else {
747  }
748  } else {
749  if (c->srcRange) {
752  } else {
755  }
756  }
757  }
758 
759  if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
760  srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
761  c->needs_hcscale = 1;
762 }
763 
765 {
767 
768  if (HAVE_MMX)
770  if (HAVE_ALTIVEC)
772 
773  return swScale;
774 }
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int chrBufIndex
Index in ring buffer of the last scaled horizontal chroma line from source.
static void lumRangeToJpeg_c(int16_t *dst, int width)
Definition: swscale.c:186
void(* hcscale_fast)(struct SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
Definition: swscale.c:177
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:1435
int chrSrcH
Height of source chroma planes.
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, int lastInLumBuf, int lastInChrBuf)
#define SWS_PRINT_INFO
Definition: swscale.h:70
#define SWS_FAST_BILINEAR
Definition: swscale.h:53
int vChrDrop
Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user...
static void lumRangeToJpeg16_c(int16_t *_dst, int width)
Definition: swscale.c:222
void(* readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
external API header
#define HAVE_ALTIVEC
Definition: config.h:54
int srcRange
0 = MPG YUV range, 1 = JPG YUV range (source image).
const uint8_t * lumDither8
int dstY
Last destination vertical line output from last slice.
int stride
Definition: mace.c:144
#define HAVE_MMX
Definition: config.h:46
void(* readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width)
Functions to read planar input, such as planar RGB, and convert internally to Y/UV.
int srcH
Height of source luma/alpha planes.
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:86
int chrDstVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination i...
void(* yuv2interleavedX_fn)(struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, uint8_t *dest, int dstW)
Write one line of horizontally scaled chroma to interleaved output with multi-point vertical scaling ...
uint8_t bits
Definition: crc.c:31
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:83
uint8_t
static void lumRangeFromJpeg_c(int16_t *dst, int width)
Definition: swscale.c:193
int vChrFilterSize
Vertical filter size for chroma pixels.
int16_t ** lumPixBuf
Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
#define emms_c()
Definition: internal.h:145
int lastInLumBuf
Last scaled horizontal luma/alpha line from source in the ring buffer.
static int flags
Definition: log.c:42
#define isAnyRGB(x)
external api for the swscale stuff
enum AVPixelFormat dstFormat
Destination pixel format.
#define isALPHA(x)
Definition: swscale-test.c:47
av_cold void ff_sws_init_input_funcs(SwsContext *c)
Definition: input.c:671
yuv2packedX_fn yuv2packedX
void(* lumConvertRange)(int16_t *dst, int width)
Color range conversion function for luma plane if needed.
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
#define DEBUG_BUFFERS(...)
Definition: swscale.c:336
int dstH
Height of destination luma/alpha planes.
void(* yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc, uint8_t *dest, int dstW, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output without any additional v...
uint16_t depth_minus1
number of bits in the component minus 1
Definition: pixdesc.h:43
static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
Definition: swscale.c:285
int16_t ** chrVPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int32_t * hChrFilterPos
Array of horizontal filter starting positions for each dst[i] for chroma planes.
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
SwsFunc ff_getSwsFunc(SwsContext *c)
Return function pointer to fastest main scaler path function depending on architecture and available ...
Definition: swscale.c:764
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
Definition: swscale.c:168
#define CONFIG_SWSCALE_ALPHA
Definition: config.h:318
yuv2packed1_fn yuv2packed1
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:110
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:146
int vChrBufSize
Number of vertical chroma lines allocated in the ring buffer.
int chrDstW
Width of destination chroma planes.
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
Definition: swscale.c:238
void(* alpToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of alpha plane to YV12 for horizontal scaler.
void(* chrConvertRange)(int16_t *dst1, int16_t *dst2, int width)
Color range conversion function for chroma planes if needed.
int32_t * hLumFilterPos
Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
int hChrFilterSize
Horizontal filter size for chroma pixels.
static void filter(MpegAudioContext *s, int ch, const short *samples, int incr)
Definition: mpegaudioenc.c:318
int dstRange
0 = MPG YUV range, 1 = JPG YUV range (destination image).
void(* yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output without any additional vertical scaling (...
uint8_t * formatConvBuffer
void(* chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *pal)
Unscaled conversion of chroma planes to YV12 for horizontal scaler.
static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
yuv2planar1_fn yuv2plane1
int vLumBufSize
Number of vertical luma/alpha lines allocated in the ring buffer.
int16_t ** chrUPixBuf
Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
yuv2interleavedX_fn yuv2nv12cX
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hLumFilter, const int32_t *hLumFilterPos, int hLumFilterSize, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha)
Definition: swscale.c:252
int32_t
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
const uint8_t ff_sws_pb_64[8]
Definition: swscale.c:49
int dstW
Width of destination luma/alpha planes.
int needs_hcscale
Set if there are chroma planes to be converted.
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
NULL
Definition: eval.c:52
static int width
Definition: utils.c:156
static int swScale(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
Definition: swscale.c:340
static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt)
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:55
yuv2planarX_fn yuv2planeX
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
#define FILL8TO9_OR_10(wfunc)
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width, int height, int y, uint8_t val)
Definition: swscale.c:53
static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
Definition: swscale.c:230
int(* SwsFunc)(struct SwsContext *context, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
byte swapping routines
#define AV_WB16(p, d)
Definition: intreadwrite.h:213
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
#define isGray(x)
Definition: swscale-test.c:38
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:25
int16_t * hLumFilter
Array of horizontal filter coefficients for luma/alpha planes.
static void fill_plane9or10(uint8_t *plane, int stride, int width, int height, int y, uint8_t val, const int dst_depth, const int big_endian)
Definition: swscale.c:64
const uint8_t * chrDither8
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
Definition: swscale.c:200
int lumBufIndex
Index in ring buffer of the last scaled horizontal luma/alpha line from source.
int height
Definition: gxfenc.c:72
Y , 1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb...
Definition: pixfmt.h:75
int lastInChrBuf
Last scaled horizontal chroma line from source in the ring buffer.
yuv2packed2_fn yuv2packed2
void(* yuv2planarX_fn)(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output with multi-point vertical scaling between...
void(* yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2], const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing bilinear scalin...
Y , 1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb...
Definition: pixfmt.h:74
void(* yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing multi-point ver...
enum AVPixelFormat srcFormat
Source pixel format.
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
void(* hyscale_fast)(struct SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
Scale one horizontal line of input data using a bilinear filter to produce one line of output data...
const uint8_t dither_8x8_128[8][8]
Definition: swscale.c:38
av_cold void ff_sws_init_swScale_altivec(SwsContext *c)
av_cold void ff_sws_init_output_funcs(SwsContext *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, yuv2interleavedX_fn *yuv2nv12cX, yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2, yuv2packedX_fn *yuv2packedX)
Definition: output.c:1264
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
Definition: swscale.c:211
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
Definition: swscale.c:305
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hChrFilter, const int32_t *hChrFilterPos, int hChrFilterSize, uint8_t *formatConvBuffer, uint32_t *pal)
Definition: swscale.c:300
int16_t * hChrFilter
Array of horizontal filter coefficients for chroma planes.
#define AV_WL16(p, d)
Definition: intreadwrite.h:225
static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:133
int chrSrcW
Width of source chroma planes.
int srcW
Width of source luma/alpha planes.
int chrSrcVSubSample
Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image...
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
AVPixelFormat
Pixel format.
Definition: pixfmt.h:63
uint32_t pal_yuv[256]
static av_cold void sws_init_swScale_c(SwsContext *c)
Definition: swscale.c:714
void(* lumToYV12)(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
Unscaled conversion of luma plane to YV12 for horizontal scaler.
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
if(!(ptr_align%ac->ptr_align)&&samples_align >=aligned_len)
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:149