21 #include "../SDL_internal.h"
30 #define SDL_YUV_SD_THRESHOLD 576
96 int pitches[3] = { 0, 0, 0 };
101 pitches[0] = yuv_pitch;
102 pitches[1] = (pitches[0] + 1) / 2;
103 pitches[2] = (pitches[0] + 1) / 2;
104 planes[0] = (
const Uint8 *)yuv;
105 planes[1] = planes[0] + pitches[0] *
height;
106 planes[2] = planes[1] + pitches[1] * ((
height + 1) / 2);
111 pitches[0] = yuv_pitch;
112 planes[0] = (
const Uint8 *)yuv;
116 pitches[0] = yuv_pitch;
117 pitches[1] = 2 * ((pitches[0] + 1) / 2);
118 planes[0] = (
const Uint8 *)yuv;
119 planes[1] = planes[0] + pitches[0] *
height;
128 *y_stride = pitches[0];
131 *uv_stride = pitches[1];
135 *y_stride = pitches[0];
138 *uv_stride = pitches[1];
142 *y_stride = pitches[0];
145 *uv_stride = pitches[0];
149 *y_stride = pitches[0];
152 *uv_stride = pitches[0];
156 *y_stride = pitches[0];
159 *uv_stride = pitches[0];
163 *y_stride = pitches[0];
166 *uv_stride = pitches[1];
170 *y_stride = pitches[0];
173 *uv_stride = pitches[1];
197 switch (dst_format) {
229 switch (dst_format) {
260 switch (dst_format) {
301 switch (dst_format) {
333 switch (dst_format) {
364 switch (dst_format) {
396 Uint32 src_format,
const void *
src,
int src_pitch,
397 Uint32 dst_format,
void *
dst,
int dst_pitch)
414 if (
yuv_rgb_sse(src_format, dst_format,
width,
height,
y, u,
v, y_stride, uv_stride, (
Uint8*)
dst, dst_pitch, yuv_type)) {
418 if (
yuv_rgb_std(src_format, dst_format,
width,
height,
y, u,
v, y_stride, uv_stride, (
Uint8*)
dst, dst_pitch, yuv_type)) {
460 const int src_pitch_x_2 = src_pitch * 2;
461 const int height_half =
height / 2;
462 const int height_remainder = (
height & 0x1);
463 const int width_half =
width / 2;
464 const int width_remainder = (
width & 0x1);
472 { 0.2990f, 0.5870f, 0.1140f },
473 { -0.1687f, -0.3313f, 0.5000f },
474 { 0.5000f, -0.4187f, -0.0813f },
479 { 0.2568f, 0.5041f, 0.0979f },
480 { -0.1482f, -0.2910f, 0.4392f },
481 { 0.4392f, -0.3678f, -0.0714f },
486 { 0.1826f, 0.6142f, 0.0620f },
487 {-0.1006f, -0.3386f, 0.4392f },
488 { 0.4392f, -0.3989f, -0.0403f },
493 #define MAKE_Y(r, g, b) (Uint8)((int)(cvt->y[0] * (r) + cvt->y[1] * (g) + cvt->y[2] * (b) + 0.5f) + cvt->y_offset)
494 #define MAKE_U(r, g, b) (Uint8)((int)(cvt->u[0] * (r) + cvt->u[1] * (g) + cvt->u[2] * (b) + 0.5f) + 128)
495 #define MAKE_V(r, g, b) (Uint8)((int)(cvt->v[0] * (r) + cvt->v[1] * (g) + cvt->v[2] * (b) + 0.5f) + 128)
497 #define READ_2x2_PIXELS \
498 const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i]; \
499 const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1]; \
500 const Uint32 p3 = ((const Uint32 *)next_row)[2 * i]; \
501 const Uint32 p4 = ((const Uint32 *)next_row)[2 * i + 1]; \
502 const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000) + (p3 & 0x00ff0000) + (p4 & 0x00ff0000)) >> 18; \
503 const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00) + (p3 & 0x0000ff00) + (p4 & 0x0000ff00)) >> 10; \
504 const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff) + (p3 & 0x000000ff) + (p4 & 0x000000ff)) >> 2; \
506 #define READ_2x1_PIXELS \
507 const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i]; \
508 const Uint32 p2 = ((const Uint32 *)next_row)[2 * i]; \
509 const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17; \
510 const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9; \
511 const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1; \
513 #define READ_1x2_PIXELS \
514 const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i]; \
515 const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1]; \
516 const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17; \
517 const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9; \
518 const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1; \
520 #define READ_1x1_PIXEL \
521 const Uint32 p = ((const Uint32 *)curr_row)[2 * i]; \
522 const Uint32 r = (p & 0x00ff0000) >> 16; \
523 const Uint32 g = (p & 0x0000ff00) >> 8; \
524 const Uint32 b = (p & 0x000000ff); \
526 #define READ_TWO_RGB_PIXELS \
527 const Uint32 p = ((const Uint32 *)curr_row)[2 * i]; \
528 const Uint32 r = (p & 0x00ff0000) >> 16; \
529 const Uint32 g = (p & 0x0000ff00) >> 8; \
530 const Uint32 b = (p & 0x000000ff); \
531 const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i + 1]; \
532 const Uint32 r1 = (p1 & 0x00ff0000) >> 16; \
533 const Uint32 g1 = (p1 & 0x0000ff00) >> 8; \
534 const Uint32 b1 = (p1 & 0x000000ff); \
535 const Uint32 R = (r + r1)/2; \
536 const Uint32 G = (g + g1)/2; \
537 const Uint32 B = (b + b1)/2; \
539 #define READ_ONE_RGB_PIXEL READ_1x1_PIXEL
548 const Uint8 *curr_row, *next_row;
553 Uint8 *plane_interleaved_uv;
554 Uint32 y_stride, uv_stride, y_skip, uv_skip;
557 (
const Uint8 **)&plane_y, (
const Uint8 **)&plane_u, (
const Uint8 **)&plane_v,
558 &y_stride, &uv_stride);
559 plane_interleaved_uv = (plane_y +
height * y_stride);
560 y_skip = (y_stride -
width);
568 const Uint32 r = (p1 & 0x00ff0000) >> 16;
569 const Uint32 g = (p1 & 0x0000ff00) >> 8;
570 const Uint32 b = (p1 & 0x000000ff);
574 curr_row += src_pitch;
579 next_row += src_pitch;
584 uv_skip = (uv_stride - (
width + 1)/2);
585 for (
j = 0;
j < height_half;
j++) {
586 for (
i = 0;
i < width_half;
i++) {
591 if (width_remainder) {
598 curr_row += src_pitch_x_2;
599 next_row += src_pitch_x_2;
601 if (height_remainder) {
602 for (
i = 0;
i < width_half;
i++) {
607 if (width_remainder) {
618 uv_skip = (uv_stride - ((
width + 1)/2)*2);
619 for (
j = 0;
j < height_half;
j++) {
620 for (
i = 0;
i < width_half;
i++) {
622 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
623 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
625 if (width_remainder) {
627 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
628 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
630 plane_interleaved_uv += uv_skip;
631 curr_row += src_pitch_x_2;
632 next_row += src_pitch_x_2;
634 if (height_remainder) {
635 for (
i = 0;
i < width_half;
i++) {
637 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
638 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
640 if (width_remainder) {
642 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
643 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
649 uv_skip = (uv_stride - ((
width + 1)/2)*2);
650 for (
j = 0;
j < height_half;
j++) {
651 for (
i = 0;
i < width_half;
i++) {
653 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
654 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
656 if (width_remainder) {
658 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
659 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
661 plane_interleaved_uv += uv_skip;
662 curr_row += src_pitch_x_2;
663 next_row += src_pitch_x_2;
665 if (height_remainder) {
666 for (
i = 0;
i < width_half;
i++) {
668 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
669 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
671 if (width_remainder) {
673 *plane_interleaved_uv++ =
MAKE_V(
r,
g,
b);
674 *plane_interleaved_uv++ =
MAKE_U(
r,
g,
b);
687 const int row_size = (4 * ((
width + 1) / 2));
690 if (dst_pitch < row_size) {
691 return SDL_SetError(
"Destination pitch is too small, expected at least %d\n", row_size);
693 plane_skip = (dst_pitch - row_size);
699 for (
i = 0;
i < width_half;
i++) {
704 *plane++ =
MAKE_Y(r1, g1, b1);
707 if (width_remainder) {
716 curr_row += src_pitch;
722 for (
i = 0;
i < width_half;
i++) {
728 *plane++ =
MAKE_Y(r1, g1, b1);
730 if (width_remainder) {
739 curr_row += src_pitch;
745 for (
i = 0;
i < width_half;
i++) {
750 *plane++ =
MAKE_Y(r1, g1, b1);
753 if (width_remainder) {
762 curr_row += src_pitch;
774 #undef READ_2x2_PIXELS
775 #undef READ_2x1_PIXELS
776 #undef READ_1x2_PIXELS
777 #undef READ_1x1_PIXEL
778 #undef READ_TWO_RGB_PIXELS
779 #undef READ_ONE_RGB_PIXEL
785 Uint32 src_format,
const void *
src,
int src_pitch,
786 Uint32 dst_format,
void *
dst,
int dst_pitch)
843 const void *
src,
int src_pitch,
void *
dst,
int dst_pitch)
859 src_pitch = (src_pitch + 1) / 2;
860 dst_pitch = (dst_pitch + 1) / 2;
870 src_pitch = ((src_pitch + 1) / 2)*2;
871 dst_pitch = ((dst_pitch + 1) / 2)*2;
899 const int UVwidth = (
width + 1)/2;
900 const int UVheight = (
height + 1)/2;
907 int UVpitch = (dst_pitch + 1)/2;
917 for (
y = 0;
y < UVheight; ++
y) {
928 int srcUVPitch = ((src_pitch + 1)/2);
929 int dstUVPitch = ((dst_pitch + 1)/2);
933 dstUV = (
Uint8 *)
dst + UVheight * dstUVPitch;
934 for (
y = 0;
y < UVheight; ++
y) {
942 for (
y = 0;
y < UVheight; ++
y) {
955 const int UVwidth = (
width + 1)/2;
956 const int UVheight = (
height + 1)/2;
957 const int srcUVPitch = ((src_pitch + 1)/2);
958 const int srcUVPitchLeft = srcUVPitch - UVwidth;
959 const int dstUVPitch = ((dst_pitch + 1)/2)*2;
960 const int dstUVPitchLeft = dstUVPitch - UVwidth*2;
961 const Uint8 *src1, *src2;
984 src1 = src2 + UVheight * srcUVPitch;
987 src2 = src1 + UVheight * srcUVPitch;
997 __m128i
u = _mm_loadu_si128((__m128i *)src1);
998 __m128i
v = _mm_loadu_si128((__m128i *)src2);
999 __m128i uv1 = _mm_unpacklo_epi8(
u,
v);
1000 __m128i uv2 = _mm_unpackhi_epi8(
u,
v);
1001 _mm_storeu_si128((__m128i*)dstUV, uv1);
1002 _mm_storeu_si128((__m128i*)(dstUV + 16), uv2);
1014 src1 += srcUVPitchLeft;
1015 src2 += srcUVPitchLeft;
1016 dstUV += dstUVPitchLeft;
1029 const int UVwidth = (
width + 1)/2;
1030 const int UVheight = (
height + 1)/2;
1031 const int srcUVPitch = ((src_pitch + 1)/2)*2;
1032 const int srcUVPitchLeft = srcUVPitch - UVwidth*2;
1033 const int dstUVPitch = ((dst_pitch + 1)/2);
1034 const int dstUVPitchLeft = dstUVPitch - UVwidth;
1058 dst1 = dst2 + UVheight * dstUVPitch;
1061 dst2 = dst1 + UVheight * dstUVPitch;
1070 __m128i
mask = _mm_set1_epi16(0x00FF);
1072 __m128i uv1 = _mm_loadu_si128((__m128i*)srcUV);
1073 __m128i uv2 = _mm_loadu_si128((__m128i*)(srcUV+16));
1074 __m128i
u1 = _mm_and_si128(uv1,
mask);
1075 __m128i
u2 = _mm_and_si128(uv2,
mask);
1076 __m128i
u = _mm_packus_epi16(
u1,
u2);
1077 __m128i
v1 = _mm_srli_epi16(uv1, 8);
1078 __m128i
v2 = _mm_srli_epi16(uv2, 8);
1079 __m128i
v = _mm_packus_epi16(
v1,
v2);
1080 _mm_storeu_si128((__m128i*)dst1,
u);
1081 _mm_storeu_si128((__m128i*)dst2,
v);
1093 srcUV += srcUVPitchLeft;
1094 dst1 += dstUVPitchLeft;
1095 dst2 += dstUVPitchLeft;
1108 const int UVwidth = (
width + 1)/2;
1109 const int UVheight = (
height + 1)/2;
1110 const int srcUVPitch = ((src_pitch + 1)/2)*2;
1111 const int srcUVPitchLeft = (srcUVPitch - UVwidth*2)/
sizeof(
Uint16);
1112 const int dstUVPitch = ((dst_pitch + 1)/2)*2;
1113 const int dstUVPitchLeft = (dstUVPitch - UVwidth*2)/
sizeof(
Uint16);
1132 __m128i uv = _mm_loadu_si128((__m128i*)srcUV);
1133 __m128i
v = _mm_slli_epi16(uv, 8);
1134 __m128i
u = _mm_srli_epi16(uv, 8);
1135 __m128i vu = _mm_or_si128(
v,
u);
1136 _mm_storeu_si128((__m128i*)dstUV, vu);
1146 srcUV += srcUVPitchLeft;
1147 dstUV += dstUVPitchLeft;
1154 Uint32 src_format,
const void *
src,
int src_pitch,
1155 Uint32 dst_format,
void *
dst,
int dst_pitch)
1169 switch (src_format) {
1171 switch (dst_format) {
1183 switch (dst_format) {
1195 switch (dst_format) {
1207 switch (dst_format) {
1224 #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
1226 __m128i yuv = _mm_loadu_si128((__m128i*)srcYUV); \
1227 __m128i lo = _mm_unpacklo_epi8(yuv, _mm_setzero_si128()); \
1228 __m128i hi = _mm_unpackhi_epi8(yuv, _mm_setzero_si128()); \
1229 lo = _mm_shufflelo_epi16(lo, shuffle); \
1230 lo = _mm_shufflehi_epi16(lo, shuffle); \
1231 hi = _mm_shufflelo_epi16(hi, shuffle); \
1232 hi = _mm_shufflehi_epi16(hi, shuffle); \
1233 yuv = _mm_packus_epi16(lo, hi); \
1234 _mm_storeu_si128((__m128i*)dstYUV, yuv); \
1244 const int YUVwidth = (
width + 1)/2;
1245 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1246 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1276 srcYUV += srcYUVPitchLeft;
1277 dstYUV += dstYUVPitchLeft;
1286 const int YUVwidth = (
width + 1)/2;
1287 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1288 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1318 srcYUV += srcYUVPitchLeft;
1319 dstYUV += dstYUVPitchLeft;
1328 const int YUVwidth = (
width + 1)/2;
1329 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1330 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1360 srcYUV += srcYUVPitchLeft;
1361 dstYUV += dstYUVPitchLeft;
1370 const int YUVwidth = (
width + 1)/2;
1371 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1372 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1402 srcYUV += srcYUVPitchLeft;
1403 dstYUV += dstYUVPitchLeft;
1412 const int YUVwidth = (
width + 1)/2;
1413 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1414 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1444 srcYUV += srcYUVPitchLeft;
1445 dstYUV += dstYUVPitchLeft;
1454 const int YUVwidth = (
width + 1)/2;
1455 const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
1456 const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
1486 srcYUV += srcYUVPitchLeft;
1487 dstYUV += dstYUVPitchLeft;
1494 Uint32 src_format,
const void *
src,
int src_pitch,
1495 Uint32 dst_format,
void *
dst,
int dst_pitch)
1497 switch (src_format) {
1499 switch (dst_format) {
1509 switch (dst_format) {
1519 switch (dst_format) {
1536 Uint32 src_format,
const void *
src,
int src_pitch,
1537 Uint32 dst_format,
void *
dst,
int dst_pitch)
1541 Uint32 srcY_pitch, srcUV_pitch;
1542 Uint32 srcY_pitch_left, srcUV_pitch_left, srcUV_pixel_stride;
1543 Uint8 *
dstY1, *dstY2, *dstU1, *dstU2, *dstV1, *dstV2;
1544 Uint32 dstY_pitch, dstUV_pitch;
1548 return SDL_SetError(
"Can't change YUV plane types in-place");
1552 &
srcY1, &srcU, &srcV, &srcY_pitch, &srcUV_pitch) < 0) {
1555 srcY2 =
srcY1 + srcY_pitch;
1556 srcY_pitch_left = (srcY_pitch -
width);
1559 srcUV_pixel_stride = 2;
1560 srcUV_pitch_left = (srcUV_pitch - 2*((
width + 1)/2));
1562 srcUV_pixel_stride = 1;
1563 srcUV_pitch_left = (srcUV_pitch - ((
width + 1)/2));
1568 &dstY_pitch, &dstUV_pitch) < 0) {
1571 dstY2 =
dstY1 + dstY_pitch;
1572 dstU2 = dstU1 + dstUV_pitch;
1573 dstV2 = dstV1 + dstUV_pitch;
1574 dst_pitch_left = (dstY_pitch - 4*((
width + 1)/2));
1578 for (
x = 0;
x < (
width - 1);
x += 2) {
1595 srcU += srcUV_pixel_stride;
1596 srcV += srcUV_pixel_stride;
1621 srcU += srcUV_pixel_stride;
1622 srcV += srcUV_pixel_stride;
1629 srcY1 += srcY_pitch_left + srcY_pitch;
1630 srcY2 += srcY_pitch_left + srcY_pitch;
1631 srcU += srcUV_pitch_left;
1632 srcV += srcUV_pitch_left;
1633 dstY1 += dst_pitch_left + dstY_pitch;
1634 dstY2 += dst_pitch_left + dstY_pitch;
1635 dstU1 += dst_pitch_left + dstUV_pitch;
1636 dstU2 += dst_pitch_left + dstUV_pitch;
1637 dstV1 += dst_pitch_left + dstUV_pitch;
1638 dstV2 += dst_pitch_left + dstUV_pitch;
1643 for (
x = 0;
x < (
width - 1);
x += 2) {
1652 srcU += srcUV_pixel_stride;
1653 srcV += srcUV_pixel_stride;
1668 srcU += srcUV_pixel_stride;
1669 srcV += srcUV_pixel_stride;
1679 Uint32 src_format,
const void *
src,
int src_pitch,
1680 Uint32 dst_format,
void *
dst,
int dst_pitch)
1683 const Uint8 *
srcY1, *srcY2, *srcU1, *srcU2, *srcV1, *srcV2;
1684 Uint32 srcY_pitch, srcUV_pitch;
1687 Uint32 dstY_pitch, dstUV_pitch;
1688 Uint32 dstY_pitch_left, dstUV_pitch_left, dstUV_pixel_stride;
1691 return SDL_SetError(
"Can't change YUV plane types in-place");
1695 &
srcY1, &srcU1, &srcV1, &srcY_pitch, &srcUV_pitch) < 0) {
1698 srcY2 =
srcY1 + srcY_pitch;
1699 srcU2 = srcU1 + srcUV_pitch;
1700 srcV2 = srcV1 + srcUV_pitch;
1701 src_pitch_left = (srcY_pitch - 4*((
width + 1)/2));
1705 &dstY_pitch, &dstUV_pitch) < 0) {
1708 dstY2 =
dstY1 + dstY_pitch;
1709 dstY_pitch_left = (dstY_pitch -
width);
1712 dstUV_pixel_stride = 2;
1713 dstUV_pitch_left = (dstUV_pitch - 2*((
width + 1)/2));
1715 dstUV_pixel_stride = 1;
1716 dstUV_pitch_left = (dstUV_pitch - ((
width + 1)/2));
1721 for (
x = 0;
x < (
width - 1);
x += 2) {
1741 dstU += dstUV_pixel_stride;
1742 dstV += dstUV_pixel_stride;
1766 dstU += dstUV_pixel_stride;
1767 dstV += dstUV_pixel_stride;
1770 srcY1 += src_pitch_left + srcY_pitch;
1771 srcY2 += src_pitch_left + srcY_pitch;
1772 srcU1 += src_pitch_left + srcUV_pitch;
1773 srcU2 += src_pitch_left + srcUV_pitch;
1774 srcV1 += src_pitch_left + srcUV_pitch;
1775 srcV2 += src_pitch_left + srcUV_pitch;
1776 dstY1 += dstY_pitch_left + dstY_pitch;
1777 dstY2 += dstY_pitch_left + dstY_pitch;
1778 dstU += dstUV_pitch_left;
1779 dstV += dstUV_pitch_left;
1784 for (
x = 0;
x < (
width - 1);
x += 2) {
1795 dstU += dstUV_pixel_stride;
1796 dstV += dstUV_pixel_stride;
1811 Uint32 src_format,
const void *
src,
int src_pitch,
1812 Uint32 dst_format,
void *
dst,
int dst_pitch)
1814 if (src_format == dst_format) {