3 #include "../../SDL_internal.h" 11 #define PRECISION_FACTOR (1<<PRECISION) 35 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5) 44 { 0,
V(1.0),
V(1.402), -
V(0.3441), -
V(0.7141),
V(1.772)},
46 { 16,
V(1.1644),
V(1.596), -
V(0.3918), -
V(0.813),
V(2.0172)},
48 { 16,
V(1.1644),
V(1.7927), -
V(0.2132), -
V(0.5329),
V(2.1124)}
53 { 0, {{
V(0.299),
V(0.587),
V(0.114)}, {-
V(0.1687), -
V(0.3313),
V(0.5)}, {
V(0.5), -
V(0.4187), -
V(0.0813)}}},
55 { 16, {{
V(0.2568),
V(0.5041),
V(0.0979)}, {-
V(0.1482), -
V(0.291),
V(0.4392)}, {
V(0.4392), -
V(0.3678), -
V(0.0714)}}},
57 { 16, {{
V(0.1826),
V(0.6142),
V(0.062)}, {-
V(0.1006), -
V(0.3386),
V(0.4392)}, {
V(0.4392), -
V(0.3989), -
V(0.0403)}}}
61 #define YUV_FORMAT_420 1 62 #define YUV_FORMAT_422 2 63 #define YUV_FORMAT_NV12 3 66 #define RGB_FORMAT_RGB565 1 67 #define RGB_FORMAT_RGB24 2 68 #define RGB_FORMAT_RGBA 3 69 #define RGB_FORMAT_BGRA 4 70 #define RGB_FORMAT_ARGB 5 71 #define RGB_FORMAT_ABGR 6 78 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
81 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
82 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
83 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
84 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
85 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
86 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
87 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
88 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
96 #define STD_FUNCTION_NAME yuv420_rgb565_std 97 #define YUV_FORMAT YUV_FORMAT_420 98 #define RGB_FORMAT RGB_FORMAT_RGB565 101 #define STD_FUNCTION_NAME yuv420_rgb24_std 102 #define YUV_FORMAT YUV_FORMAT_420 103 #define RGB_FORMAT RGB_FORMAT_RGB24 106 #define STD_FUNCTION_NAME yuv420_rgba_std 107 #define YUV_FORMAT YUV_FORMAT_420 108 #define RGB_FORMAT RGB_FORMAT_RGBA 111 #define STD_FUNCTION_NAME yuv420_bgra_std 112 #define YUV_FORMAT YUV_FORMAT_420 113 #define RGB_FORMAT RGB_FORMAT_BGRA 116 #define STD_FUNCTION_NAME yuv420_argb_std 117 #define YUV_FORMAT YUV_FORMAT_420 118 #define RGB_FORMAT RGB_FORMAT_ARGB 121 #define STD_FUNCTION_NAME yuv420_abgr_std 122 #define YUV_FORMAT YUV_FORMAT_420 123 #define RGB_FORMAT RGB_FORMAT_ABGR 126 #define STD_FUNCTION_NAME yuv422_rgb565_std 127 #define YUV_FORMAT YUV_FORMAT_422 128 #define RGB_FORMAT RGB_FORMAT_RGB565 131 #define STD_FUNCTION_NAME yuv422_rgb24_std 132 #define YUV_FORMAT YUV_FORMAT_422 133 #define RGB_FORMAT RGB_FORMAT_RGB24 136 #define STD_FUNCTION_NAME yuv422_rgba_std 137 #define YUV_FORMAT YUV_FORMAT_422 138 #define RGB_FORMAT RGB_FORMAT_RGBA 141 #define STD_FUNCTION_NAME yuv422_bgra_std 142 #define YUV_FORMAT YUV_FORMAT_422 143 #define RGB_FORMAT RGB_FORMAT_BGRA 146 #define STD_FUNCTION_NAME yuv422_argb_std 147 #define YUV_FORMAT YUV_FORMAT_422 148 #define RGB_FORMAT RGB_FORMAT_ARGB 151 #define STD_FUNCTION_NAME yuv422_abgr_std 152 #define YUV_FORMAT YUV_FORMAT_422 153 #define RGB_FORMAT RGB_FORMAT_ABGR 156 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 157 #define YUV_FORMAT YUV_FORMAT_NV12 158 #define RGB_FORMAT RGB_FORMAT_RGB565 161 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 162 #define YUV_FORMAT YUV_FORMAT_NV12 163 #define RGB_FORMAT RGB_FORMAT_RGB24 166 #define STD_FUNCTION_NAME yuvnv12_rgba_std 167 #define YUV_FORMAT YUV_FORMAT_NV12 168 #define RGB_FORMAT RGB_FORMAT_RGBA 171 #define STD_FUNCTION_NAME yuvnv12_bgra_std 172 #define YUV_FORMAT YUV_FORMAT_NV12 173 #define RGB_FORMAT RGB_FORMAT_BGRA 176 #define STD_FUNCTION_NAME yuvnv12_argb_std 177 #define YUV_FORMAT YUV_FORMAT_NV12 178 #define RGB_FORMAT RGB_FORMAT_ARGB 181 #define STD_FUNCTION_NAME yuvnv12_abgr_std 182 #define YUV_FORMAT YUV_FORMAT_NV12 183 #define RGB_FORMAT RGB_FORMAT_ABGR 195 for(y=0; y<(height-1); y+=2)
197 const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
198 *rgb_ptr2=RGB+(y+1)*RGB_stride;
201 *y_ptr2=Y+(y+1)*Y_stride,
202 *u_ptr=U+(y/2)*UV_stride,
203 *v_ptr=V+(y/2)*UV_stride;
205 for(x=0; x<(width-1); x+=2)
210 y_tmp = param->
matrix[0][0]*rgb_ptr1[0] + param->
matrix[0][1]*rgb_ptr1[1] + param->
matrix[0][2]*rgb_ptr1[2];
211 u_tmp = param->
matrix[1][0]*rgb_ptr1[0] + param->
matrix[1][1]*rgb_ptr1[1] + param->
matrix[1][2]*rgb_ptr1[2];
212 v_tmp = param->
matrix[2][0]*rgb_ptr1[0] + param->
matrix[2][1]*rgb_ptr1[1] + param->
matrix[2][2]*rgb_ptr1[2];
215 y_tmp = param->
matrix[0][0]*rgb_ptr1[3] + param->
matrix[0][1]*rgb_ptr1[4] + param->
matrix[0][2]*rgb_ptr1[5];
216 u_tmp += param->
matrix[1][0]*rgb_ptr1[3] + param->
matrix[1][1]*rgb_ptr1[4] + param->
matrix[1][2]*rgb_ptr1[5];
217 v_tmp += param->
matrix[2][0]*rgb_ptr1[3] + param->
matrix[2][1]*rgb_ptr1[4] + param->
matrix[2][2]*rgb_ptr1[5];
220 y_tmp = param->
matrix[0][0]*rgb_ptr2[0] + param->
matrix[0][1]*rgb_ptr2[1] + param->
matrix[0][2]*rgb_ptr2[2];
221 u_tmp += param->
matrix[1][0]*rgb_ptr2[0] + param->
matrix[1][1]*rgb_ptr2[1] + param->
matrix[1][2]*rgb_ptr2[2];
222 v_tmp += param->
matrix[2][0]*rgb_ptr2[0] + param->
matrix[2][1]*rgb_ptr2[1] + param->
matrix[2][2]*rgb_ptr2[2];
225 y_tmp = param->
matrix[0][0]*rgb_ptr2[3] + param->
matrix[0][1]*rgb_ptr2[4] + param->
matrix[0][2]*rgb_ptr2[5];
226 u_tmp += param->
matrix[1][0]*rgb_ptr2[3] + param->
matrix[1][1]*rgb_ptr2[4] + param->
matrix[1][2]*rgb_ptr2[5];
227 v_tmp += param->
matrix[2][0]*rgb_ptr2[3] + param->
matrix[2][1]*rgb_ptr2[4] + param->
matrix[2][2]*rgb_ptr2[5];
245 #define SSE_FUNCTION_NAME yuv420_rgb565_sse 246 #define STD_FUNCTION_NAME yuv420_rgb565_std 247 #define YUV_FORMAT YUV_FORMAT_420 248 #define RGB_FORMAT RGB_FORMAT_RGB565 252 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu 253 #define STD_FUNCTION_NAME yuv420_rgb565_std 254 #define YUV_FORMAT YUV_FORMAT_420 255 #define RGB_FORMAT RGB_FORMAT_RGB565 258 #define SSE_FUNCTION_NAME yuv420_rgb24_sse 259 #define STD_FUNCTION_NAME yuv420_rgb24_std 260 #define YUV_FORMAT YUV_FORMAT_420 261 #define RGB_FORMAT RGB_FORMAT_RGB24 265 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu 266 #define STD_FUNCTION_NAME yuv420_rgb24_std 267 #define YUV_FORMAT YUV_FORMAT_420 268 #define RGB_FORMAT RGB_FORMAT_RGB24 271 #define SSE_FUNCTION_NAME yuv420_rgba_sse 272 #define STD_FUNCTION_NAME yuv420_rgba_std 273 #define YUV_FORMAT YUV_FORMAT_420 274 #define RGB_FORMAT RGB_FORMAT_RGBA 278 #define SSE_FUNCTION_NAME yuv420_rgba_sseu 279 #define STD_FUNCTION_NAME yuv420_rgba_std 280 #define YUV_FORMAT YUV_FORMAT_420 281 #define RGB_FORMAT RGB_FORMAT_RGBA 284 #define SSE_FUNCTION_NAME yuv420_bgra_sse 285 #define STD_FUNCTION_NAME yuv420_bgra_std 286 #define YUV_FORMAT YUV_FORMAT_420 287 #define RGB_FORMAT RGB_FORMAT_BGRA 291 #define SSE_FUNCTION_NAME yuv420_bgra_sseu 292 #define STD_FUNCTION_NAME yuv420_bgra_std 293 #define YUV_FORMAT YUV_FORMAT_420 294 #define RGB_FORMAT RGB_FORMAT_BGRA 297 #define SSE_FUNCTION_NAME yuv420_argb_sse 298 #define STD_FUNCTION_NAME yuv420_argb_std 299 #define YUV_FORMAT YUV_FORMAT_420 300 #define RGB_FORMAT RGB_FORMAT_ARGB 304 #define SSE_FUNCTION_NAME yuv420_argb_sseu 305 #define STD_FUNCTION_NAME yuv420_argb_std 306 #define YUV_FORMAT YUV_FORMAT_420 307 #define RGB_FORMAT RGB_FORMAT_ARGB 310 #define SSE_FUNCTION_NAME yuv420_abgr_sse 311 #define STD_FUNCTION_NAME yuv420_abgr_std 312 #define YUV_FORMAT YUV_FORMAT_420 313 #define RGB_FORMAT RGB_FORMAT_ABGR 317 #define SSE_FUNCTION_NAME yuv420_abgr_sseu 318 #define STD_FUNCTION_NAME yuv420_abgr_std 319 #define YUV_FORMAT YUV_FORMAT_420 320 #define RGB_FORMAT RGB_FORMAT_ABGR 323 #define SSE_FUNCTION_NAME yuv422_rgb565_sse 324 #define STD_FUNCTION_NAME yuv422_rgb565_std 325 #define YUV_FORMAT YUV_FORMAT_422 326 #define RGB_FORMAT RGB_FORMAT_RGB565 330 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu 331 #define STD_FUNCTION_NAME yuv422_rgb565_std 332 #define YUV_FORMAT YUV_FORMAT_422 333 #define RGB_FORMAT RGB_FORMAT_RGB565 336 #define SSE_FUNCTION_NAME yuv422_rgb24_sse 337 #define STD_FUNCTION_NAME yuv422_rgb24_std 338 #define YUV_FORMAT YUV_FORMAT_422 339 #define RGB_FORMAT RGB_FORMAT_RGB24 343 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu 344 #define STD_FUNCTION_NAME yuv422_rgb24_std 345 #define YUV_FORMAT YUV_FORMAT_422 346 #define RGB_FORMAT RGB_FORMAT_RGB24 349 #define SSE_FUNCTION_NAME yuv422_rgba_sse 350 #define STD_FUNCTION_NAME yuv422_rgba_std 351 #define YUV_FORMAT YUV_FORMAT_422 352 #define RGB_FORMAT RGB_FORMAT_RGBA 356 #define SSE_FUNCTION_NAME yuv422_rgba_sseu 357 #define STD_FUNCTION_NAME yuv422_rgba_std 358 #define YUV_FORMAT YUV_FORMAT_422 359 #define RGB_FORMAT RGB_FORMAT_RGBA 362 #define SSE_FUNCTION_NAME yuv422_bgra_sse 363 #define STD_FUNCTION_NAME yuv422_bgra_std 364 #define YUV_FORMAT YUV_FORMAT_422 365 #define RGB_FORMAT RGB_FORMAT_BGRA 369 #define SSE_FUNCTION_NAME yuv422_bgra_sseu 370 #define STD_FUNCTION_NAME yuv422_bgra_std 371 #define YUV_FORMAT YUV_FORMAT_422 372 #define RGB_FORMAT RGB_FORMAT_BGRA 375 #define SSE_FUNCTION_NAME yuv422_argb_sse 376 #define STD_FUNCTION_NAME yuv422_argb_std 377 #define YUV_FORMAT YUV_FORMAT_422 378 #define RGB_FORMAT RGB_FORMAT_ARGB 382 #define SSE_FUNCTION_NAME yuv422_argb_sseu 383 #define STD_FUNCTION_NAME yuv422_argb_std 384 #define YUV_FORMAT YUV_FORMAT_422 385 #define RGB_FORMAT RGB_FORMAT_ARGB 388 #define SSE_FUNCTION_NAME yuv422_abgr_sse 389 #define STD_FUNCTION_NAME yuv422_abgr_std 390 #define YUV_FORMAT YUV_FORMAT_422 391 #define RGB_FORMAT RGB_FORMAT_ABGR 395 #define SSE_FUNCTION_NAME yuv422_abgr_sseu 396 #define STD_FUNCTION_NAME yuv422_abgr_std 397 #define YUV_FORMAT YUV_FORMAT_422 398 #define RGB_FORMAT RGB_FORMAT_ABGR 401 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse 402 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 403 #define YUV_FORMAT YUV_FORMAT_NV12 404 #define RGB_FORMAT RGB_FORMAT_RGB565 408 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu 409 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 410 #define YUV_FORMAT YUV_FORMAT_NV12 411 #define RGB_FORMAT RGB_FORMAT_RGB565 414 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse 415 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 416 #define YUV_FORMAT YUV_FORMAT_NV12 417 #define RGB_FORMAT RGB_FORMAT_RGB24 421 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu 422 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 423 #define YUV_FORMAT YUV_FORMAT_NV12 424 #define RGB_FORMAT RGB_FORMAT_RGB24 427 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse 428 #define STD_FUNCTION_NAME yuvnv12_rgba_std 429 #define YUV_FORMAT YUV_FORMAT_NV12 430 #define RGB_FORMAT RGB_FORMAT_RGBA 434 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu 435 #define STD_FUNCTION_NAME yuvnv12_rgba_std 436 #define YUV_FORMAT YUV_FORMAT_NV12 437 #define RGB_FORMAT RGB_FORMAT_RGBA 440 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse 441 #define STD_FUNCTION_NAME yuvnv12_bgra_std 442 #define YUV_FORMAT YUV_FORMAT_NV12 443 #define RGB_FORMAT RGB_FORMAT_BGRA 447 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu 448 #define STD_FUNCTION_NAME yuvnv12_bgra_std 449 #define YUV_FORMAT YUV_FORMAT_NV12 450 #define RGB_FORMAT RGB_FORMAT_BGRA 453 #define SSE_FUNCTION_NAME yuvnv12_argb_sse 454 #define STD_FUNCTION_NAME yuvnv12_argb_std 455 #define YUV_FORMAT YUV_FORMAT_NV12 456 #define RGB_FORMAT RGB_FORMAT_ARGB 460 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu 461 #define STD_FUNCTION_NAME yuvnv12_argb_std 462 #define YUV_FORMAT YUV_FORMAT_NV12 463 #define RGB_FORMAT RGB_FORMAT_ARGB 466 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse 467 #define STD_FUNCTION_NAME yuvnv12_abgr_std 468 #define YUV_FORMAT YUV_FORMAT_NV12 469 #define RGB_FORMAT RGB_FORMAT_ABGR 473 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu 474 #define STD_FUNCTION_NAME yuvnv12_abgr_std 475 #define YUV_FORMAT YUV_FORMAT_NV12 476 #define RGB_FORMAT RGB_FORMAT_ABGR 480 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 481 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ 482 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ 483 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ 484 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ 485 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ 486 B2 = _mm_unpackhi_epi8(RGB3, RGB6); 488 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 489 RGB1 = _mm_unpacklo_epi8(R1, G2); \ 490 RGB2 = _mm_unpackhi_epi8(R1, G2); \ 491 RGB3 = _mm_unpacklo_epi8(R2, B1); \ 492 RGB4 = _mm_unpackhi_epi8(R2, B1); \ 493 RGB5 = _mm_unpacklo_epi8(G1, B2); \ 494 RGB6 = _mm_unpackhi_epi8(G1, B2); \ 496 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 497 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 498 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 503 #define RGB2YUV_16(R, G, B, Y, U, V) \ 504 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ 505 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ 506 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ 507 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ 508 Y = _mm_srai_epi16(Y, PRECISION); \ 509 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ 510 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ 511 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ 512 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ 513 U = _mm_srai_epi16(U, PRECISION); \ 514 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ 515 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ 516 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ 517 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ 518 V = _mm_srai_epi16(V, PRECISION); 521 __m128i r1, r2, b1, b2, g1, g2; \ 522 __m128i r_16, g_16, b_16; \ 523 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ 524 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ 525 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ 526 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ 527 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ 528 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ 529 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ 531 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 533 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 534 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 535 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 536 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 537 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 538 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 539 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 540 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 541 y = _mm_packus_epi16(y1_16, y2_16); \ 542 u1 = _mm_packus_epi16(u1_16, u2_16); \ 543 v1 = _mm_packus_epi16(v1_16, v2_16); \ 545 SAVE_SI128((__m128i*)(y_ptr1), y); \ 547 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 548 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 549 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 550 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 551 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 552 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 553 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 554 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 555 y = _mm_packus_epi16(y1_16, y2_16); \ 556 u2 = _mm_packus_epi16(u1_16, u2_16); \ 557 v2 = _mm_packus_epi16(v1_16, v2_16); \ 559 SAVE_SI128((__m128i*)(y_ptr2), y); \ 561 u1_tmp = _mm_avg_epu8(u1, u2); \ 562 v1_tmp = _mm_avg_epu8(v1, v2); \ 564 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ 565 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ 566 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ 567 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ 568 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ 569 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ 571 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 573 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 574 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 575 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 576 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 577 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 578 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 579 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 580 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 581 y = _mm_packus_epi16(y1_16, y2_16); \ 582 u1 = _mm_packus_epi16(u1_16, u2_16); \ 583 v1 = _mm_packus_epi16(v1_16, v2_16); \ 585 SAVE_SI128((__m128i*)(y_ptr1+16), y); \ 587 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 588 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 589 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 590 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 591 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 592 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 593 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 594 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 595 y = _mm_packus_epi16(y1_16, y2_16); \ 596 u2 = _mm_packus_epi16(u1_16, u2_16); \ 597 v2 = _mm_packus_epi16(v1_16, v2_16); \ 599 SAVE_SI128((__m128i*)(y_ptr2+16), y); \ 601 u2_tmp = _mm_avg_epu8(u1, u2); \ 602 v2_tmp = _mm_avg_epu8(v1, v2); \ 604 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ 605 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ 606 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ 607 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ 608 u1 = _mm_avg_epu8(u1, u2); \ 609 v1 = _mm_avg_epu8(v1, v2); \ 610 SAVE_SI128((__m128i*)(u_ptr), u1); \ 611 SAVE_SI128((__m128i*)(v_ptr), v1); 618 #define LOAD_SI128 _mm_load_si128 619 #define SAVE_SI128 _mm_stream_si128 623 for(ypos=0; ypos<(
height-1); ypos+=2)
626 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
628 uint8_t *y_ptr1=Y+ypos*Y_stride,
629 *y_ptr2=Y+(ypos+1)*Y_stride,
630 *u_ptr=U+(ypos/2)*UV_stride,
631 *v_ptr=
V+(ypos/2)*UV_stride;
633 for(xpos=0; xpos<(
width-31); xpos+=32)
654 #define LOAD_SI128 _mm_loadu_si128 655 #define SAVE_SI128 _mm_storeu_si128 659 for(ypos=0; ypos<(
height-1); ypos+=2)
662 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
664 uint8_t *y_ptr1=Y+ypos*Y_stride,
665 *y_ptr2=Y+(ypos+1)*Y_stride,
666 *u_ptr=U+(ypos/2)*UV_stride,
667 *v_ptr=
V+(ypos/2)*UV_stride;
669 for(xpos=0; xpos<(
width-31); xpos+=32)
GLint GLint GLint GLint GLint x
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const RGB2YUVParam RGB2YUV[3]
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
GLint GLint GLsizei width
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
GLint GLint GLint GLint GLint GLint y
static uint8_t clampU8(int32_t v)
GLint GLint GLsizei GLsizei height