SDL  2.0
yuv_rgb_sse_func.h File Reference

Go to the source code of this file.

Macros

#define LOAD_SI128   _mm_loadu_si128
 
#define SAVE_SI128   _mm_storeu_si128
 
#define UV2RGB_16(U, V, R1, G1, B1, R2, G2, B2)
 
#define ADD_Y2RGB_16(Y1, Y2, R1, G1, B1, R2, G2, B2)
 
#define PACK_RGB565_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4)
 
#define PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGBA_32(R1, R2, G1, G2, B1, B2, A1, A2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, RGB7, RGB8)
 
#define PACK_PIXEL
 
#define SAVE_LINE1
 
#define SAVE_LINE2
 
#define READ_Y(y_ptr)   y = LOAD_SI128((const __m128i*)(y_ptr)); \
 
#define READ_UV
 
#define YUV2RGB_32
 

Functions

void SSE_FUNCTION_NAME (uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)
 

Macro Definition Documentation

◆ ADD_Y2RGB_16

#define ADD_Y2RGB_16 (   Y1,
  Y2,
  R1,
  G1,
  B1,
  R2,
  G2,
  B2 
)
Value:
Y1 = _mm_mullo_epi16(_mm_sub_epi16(Y1, _mm_set1_epi16(param->y_shift)), _mm_set1_epi16(param->y_factor)); \
Y2 = _mm_mullo_epi16(_mm_sub_epi16(Y2, _mm_set1_epi16(param->y_shift)), _mm_set1_epi16(param->y_factor)); \
\
R1 = _mm_srai_epi16(_mm_add_epi16(R1, Y1), PRECISION); \
G1 = _mm_srai_epi16(_mm_add_epi16(G1, Y1), PRECISION); \
B1 = _mm_srai_epi16(_mm_add_epi16(B1, Y1), PRECISION); \
R2 = _mm_srai_epi16(_mm_add_epi16(R2, Y2), PRECISION); \
G2 = _mm_srai_epi16(_mm_add_epi16(G2, Y2), PRECISION); \
B2 = _mm_srai_epi16(_mm_add_epi16(B2, Y2), PRECISION); \
#define PRECISION
Definition: yuv_rgb.c:10
GLfloat param

Definition at line 40 of file yuv_rgb_sse_func.h.

◆ LOAD_SI128

#define LOAD_SI128   _mm_loadu_si128

Definition at line 23 of file yuv_rgb_sse_func.h.

◆ PACK_PIXEL

#define PACK_PIXEL
Value:
__m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \
PACK_RGB565_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, rgb_1, rgb_2, rgb_3, rgb_4) \
PACK_RGB565_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, rgb_5, rgb_6, rgb_7, rgb_8) \
#define PACK_RGB565_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4)

Definition at line 126 of file yuv_rgb_sse_func.h.

Referenced by SSE_FUNCTION_NAME().

◆ PACK_RGB24_32

#define PACK_RGB24_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
#define PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)

Definition at line 94 of file yuv_rgb_sse_func.h.

◆ PACK_RGB24_32_STEP1

#define PACK_RGB24_32_STEP1 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
RGB1 = _mm_packus_epi16(_mm_and_si128(R1,_mm_set1_epi16(0xFF)), _mm_and_si128(R2,_mm_set1_epi16(0xFF))); \
RGB2 = _mm_packus_epi16(_mm_and_si128(G1,_mm_set1_epi16(0xFF)), _mm_and_si128(G2,_mm_set1_epi16(0xFF))); \
RGB3 = _mm_packus_epi16(_mm_and_si128(B1,_mm_set1_epi16(0xFF)), _mm_and_si128(B2,_mm_set1_epi16(0xFF))); \
RGB4 = _mm_packus_epi16(_mm_srli_epi16(R1,8), _mm_srli_epi16(R2,8)); \
RGB5 = _mm_packus_epi16(_mm_srli_epi16(G1,8), _mm_srli_epi16(G2,8)); \
RGB6 = _mm_packus_epi16(_mm_srli_epi16(B1,8), _mm_srli_epi16(B2,8)); \

Definition at line 78 of file yuv_rgb_sse_func.h.

◆ PACK_RGB24_32_STEP2

#define PACK_RGB24_32_STEP2 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
R1 = _mm_packus_epi16(_mm_and_si128(RGB1,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB2,_mm_set1_epi16(0xFF))); \
R2 = _mm_packus_epi16(_mm_and_si128(RGB3,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB4,_mm_set1_epi16(0xFF))); \
G1 = _mm_packus_epi16(_mm_and_si128(RGB5,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB6,_mm_set1_epi16(0xFF))); \
G2 = _mm_packus_epi16(_mm_srli_epi16(RGB1,8), _mm_srli_epi16(RGB2,8)); \
B1 = _mm_packus_epi16(_mm_srli_epi16(RGB3,8), _mm_srli_epi16(RGB4,8)); \
B2 = _mm_packus_epi16(_mm_srli_epi16(RGB5,8), _mm_srli_epi16(RGB6,8)); \

Definition at line 86 of file yuv_rgb_sse_func.h.

◆ PACK_RGB565_32

#define PACK_RGB565_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4 
)
Value:
{ \
__m128i red_mask, tmp1, tmp2, tmp3, tmp4; \
\
red_mask = _mm_set1_epi16((short)0xF800); \
RGB1 = _mm_and_si128(_mm_unpacklo_epi8(_mm_setzero_si128(), R1), red_mask); \
RGB2 = _mm_and_si128(_mm_unpackhi_epi8(_mm_setzero_si128(), R1), red_mask); \
RGB3 = _mm_and_si128(_mm_unpacklo_epi8(_mm_setzero_si128(), R2), red_mask); \
RGB4 = _mm_and_si128(_mm_unpackhi_epi8(_mm_setzero_si128(), R2), red_mask); \
tmp1 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpacklo_epi8(G1, _mm_setzero_si128()), 2), 5); \
tmp2 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpackhi_epi8(G1, _mm_setzero_si128()), 2), 5); \
tmp3 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpacklo_epi8(G2, _mm_setzero_si128()), 2), 5); \
tmp4 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpackhi_epi8(G2, _mm_setzero_si128()), 2), 5); \
RGB1 = _mm_or_si128(RGB1, tmp1); \
RGB2 = _mm_or_si128(RGB2, tmp2); \
RGB3 = _mm_or_si128(RGB3, tmp3); \
RGB4 = _mm_or_si128(RGB4, tmp4); \
tmp1 = _mm_srli_epi16(_mm_unpacklo_epi8(B1, _mm_setzero_si128()), 3); \
tmp2 = _mm_srli_epi16(_mm_unpackhi_epi8(B1, _mm_setzero_si128()), 3); \
tmp3 = _mm_srli_epi16(_mm_unpacklo_epi8(B2, _mm_setzero_si128()), 3); \
tmp4 = _mm_srli_epi16(_mm_unpackhi_epi8(B2, _mm_setzero_si128()), 3); \
RGB1 = _mm_or_si128(RGB1, tmp1); \
RGB2 = _mm_or_si128(RGB2, tmp2); \
RGB3 = _mm_or_si128(RGB3, tmp3); \
RGB4 = _mm_or_si128(RGB4, tmp4); \
}

Definition at line 51 of file yuv_rgb_sse_func.h.

◆ PACK_RGBA_32

#define PACK_RGBA_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  A1,
  A2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6,
  RGB7,
  RGB8 
)
Value:
{ \
__m128i lo_ab, hi_ab, lo_gr, hi_gr; \
\
lo_ab = _mm_unpacklo_epi8( A1, B1 ); \
hi_ab = _mm_unpackhi_epi8( A1, B1 ); \
lo_gr = _mm_unpacklo_epi8( G1, R1 ); \
hi_gr = _mm_unpackhi_epi8( G1, R1 ); \
RGB1 = _mm_unpacklo_epi16( lo_ab, lo_gr ); \
RGB2 = _mm_unpackhi_epi16( lo_ab, lo_gr ); \
RGB3 = _mm_unpacklo_epi16( hi_ab, hi_gr ); \
RGB4 = _mm_unpackhi_epi16( hi_ab, hi_gr ); \
\
lo_ab = _mm_unpacklo_epi8( A2, B2 ); \
hi_ab = _mm_unpackhi_epi8( A2, B2 ); \
lo_gr = _mm_unpacklo_epi8( G2, R2 ); \
hi_gr = _mm_unpackhi_epi8( G2, R2 ); \
RGB5 = _mm_unpacklo_epi16( lo_ab, lo_gr ); \
RGB6 = _mm_unpackhi_epi16( lo_ab, lo_gr ); \
RGB7 = _mm_unpacklo_epi16( hi_ab, hi_gr ); \
RGB8 = _mm_unpackhi_epi16( hi_ab, hi_gr ); \
}

Definition at line 101 of file yuv_rgb_sse_func.h.

◆ READ_UV

#define READ_UV
Value:
u = LOAD_SI128((const __m128i*)(u_ptr)); \
v = LOAD_SI128((const __m128i*)(v_ptr)); \
#define LOAD_SI128

Definition at line 255 of file yuv_rgb_sse_func.h.

◆ READ_Y

#define READ_Y (   y_ptr)    y = LOAD_SI128((const __m128i*)(y_ptr)); \

Definition at line 252 of file yuv_rgb_sse_func.h.

◆ SAVE_LINE1

#define SAVE_LINE1
Value:
SAVE_SI128((__m128i*)(rgb_ptr1), rgb_1); \
SAVE_SI128((__m128i*)(rgb_ptr1+16), rgb_2); \
SAVE_SI128((__m128i*)(rgb_ptr1+32), rgb_3); \
SAVE_SI128((__m128i*)(rgb_ptr1+48), rgb_4); \
#define SAVE_SI128

Definition at line 193 of file yuv_rgb_sse_func.h.

Referenced by SSE_FUNCTION_NAME().

◆ SAVE_LINE2

#define SAVE_LINE2
Value:
SAVE_SI128((__m128i*)(rgb_ptr2), rgb_5); \
SAVE_SI128((__m128i*)(rgb_ptr2+16), rgb_6); \
SAVE_SI128((__m128i*)(rgb_ptr2+32), rgb_7); \
SAVE_SI128((__m128i*)(rgb_ptr2+48), rgb_8); \
#define SAVE_SI128

Definition at line 199 of file yuv_rgb_sse_func.h.

Referenced by SSE_FUNCTION_NAME().

◆ SAVE_SI128

#define SAVE_SI128   _mm_storeu_si128

Definition at line 24 of file yuv_rgb_sse_func.h.

◆ UV2RGB_16

#define UV2RGB_16 (   U,
  V,
  R1,
  G1,
  B1,
  R2,
  G2,
  B2 
)
Value:
r_tmp = _mm_mullo_epi16(V, _mm_set1_epi16(param->v_r_factor)); \
g_tmp = _mm_add_epi16( \
_mm_mullo_epi16(U, _mm_set1_epi16(param->u_g_factor)), \
_mm_mullo_epi16(V, _mm_set1_epi16(param->v_g_factor))); \
b_tmp = _mm_mullo_epi16(U, _mm_set1_epi16(param->u_b_factor)); \
R1 = _mm_unpacklo_epi16(r_tmp, r_tmp); \
G1 = _mm_unpacklo_epi16(g_tmp, g_tmp); \
B1 = _mm_unpacklo_epi16(b_tmp, b_tmp); \
R2 = _mm_unpackhi_epi16(r_tmp, r_tmp); \
G2 = _mm_unpackhi_epi16(g_tmp, g_tmp); \
B2 = _mm_unpackhi_epi16(b_tmp, b_tmp); \
#define V(value)
Definition: yuv_rgb.c:35
GLfloat param

Definition at line 27 of file yuv_rgb_sse_func.h.

◆ YUV2RGB_32

#define YUV2RGB_32

Definition at line 304 of file yuv_rgb_sse_func.h.

Referenced by SSE_FUNCTION_NAME().

Function Documentation

◆ SSE_FUNCTION_NAME()

void SSE_FUNCTION_NAME ( uint32_t  width,
uint32_t  height,
const uint8_t Y,
const uint8_t U,
const uint8_t V,
uint32_t  Y_stride,
uint32_t  UV_stride,
uint8_t RGB,
uint32_t  RGB_stride,
YCbCrType  yuv_type 
)

Definition at line 385 of file yuv_rgb_sse_func.h.

References PACK_PIXEL, SAVE_LINE1, SAVE_LINE2, STD_FUNCTION_NAME(), YUV2RGB, and YUV2RGB_32.

389 {
390  const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]);
391 #if YUV_FORMAT == YUV_FORMAT_420
392  const int y_pixel_stride = 1;
393  const int uv_pixel_stride = 1;
394  const int uv_x_sample_interval = 2;
395  const int uv_y_sample_interval = 2;
396 #elif YUV_FORMAT == YUV_FORMAT_422
397  const int y_pixel_stride = 2;
398  const int uv_pixel_stride = 4;
399  const int uv_x_sample_interval = 2;
400  const int uv_y_sample_interval = 1;
401 #elif YUV_FORMAT == YUV_FORMAT_NV12
402  const int y_pixel_stride = 1;
403  const int uv_pixel_stride = 2;
404  const int uv_x_sample_interval = 2;
405  const int uv_y_sample_interval = 2;
406 #endif
407 #if RGB_FORMAT == RGB_FORMAT_RGB565
408  const int rgb_pixel_stride = 2;
409 #elif RGB_FORMAT == RGB_FORMAT_RGB24
410  const int rgb_pixel_stride = 3;
411 #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT == RGB_FORMAT_BGRA || \
412  RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT == RGB_FORMAT_ABGR
413  const int rgb_pixel_stride = 4;
414 #else
415 #error Unknown RGB pixel size
416 #endif
417 
418  if (width >= 32) {
419  uint32_t xpos, ypos;
420  for(ypos=0; ypos<(height-(uv_y_sample_interval-1)); ypos+=uv_y_sample_interval)
421  {
422  const uint8_t *y_ptr1=Y+ypos*Y_stride,
423  *y_ptr2=Y+(ypos+1)*Y_stride,
424  *u_ptr=U+(ypos/uv_y_sample_interval)*UV_stride,
425  *v_ptr=V+(ypos/uv_y_sample_interval)*UV_stride;
426 
427  uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
428  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
429 
430  for(xpos=0; xpos<(width-31); xpos+=32)
431  {
432  YUV2RGB_32
433  {
434  PACK_PIXEL
435  SAVE_LINE1
436  if (uv_y_sample_interval > 1)
437  {
438  SAVE_LINE2
439  }
440  }
441 
442  y_ptr1+=32*y_pixel_stride;
443  y_ptr2+=32*y_pixel_stride;
444  u_ptr+=32*uv_pixel_stride/uv_x_sample_interval;
445  v_ptr+=32*uv_pixel_stride/uv_x_sample_interval;
446  rgb_ptr1+=32*rgb_pixel_stride;
447  rgb_ptr2+=32*rgb_pixel_stride;
448  }
449  }
450 
451  /* Catch the last line, if needed */
452  if (uv_y_sample_interval == 2 && ypos == (height-1))
453  {
454  const uint8_t *y_ptr=Y+ypos*Y_stride,
455  *u_ptr=U+(ypos/uv_y_sample_interval)*UV_stride,
456  *v_ptr=V+(ypos/uv_y_sample_interval)*UV_stride;
457 
458  uint8_t *rgb_ptr=RGB+ypos*RGB_stride;
459 
460  STD_FUNCTION_NAME(width, 1, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
461  }
462  }
463 
464  /* Catch the right column, if needed */
465  {
466  int converted = (width & ~31);
467  if (converted != width)
468  {
469  const uint8_t *y_ptr=Y+converted*y_pixel_stride,
470  *u_ptr=U+converted*uv_pixel_stride/uv_x_sample_interval,
471  *v_ptr=V+converted*uv_pixel_stride/uv_x_sample_interval;
472 
473  uint8_t *rgb_ptr=RGB+converted*rgb_pixel_stride;
474 
475  STD_FUNCTION_NAME(width-converted, height, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
476  }
477  }
478 }
Definition: edid.h:20
#define SAVE_LINE2
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
#define YUV2RGB_32
#define PACK_PIXEL
void STD_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:42
unsigned char uint8_t
unsigned int uint32_t
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
#define V(value)
Definition: yuv_rgb.c:35
GLfloat param
#define SAVE_LINE1