SDL  2.0
yuv_rgb.c
Go to the documentation of this file.
1 // Copyright 2016 Adrien Descamps
2 // Distributed under BSD 3-Clause License
3 #include "../../SDL_internal.h"
4 
5 #include "yuv_rgb.h"
6 
7 #include "SDL_cpuinfo.h"
8 /*#include <x86intrin.h>*/
9 
10 #define PRECISION 6
11 #define PRECISION_FACTOR (1<<PRECISION)
12 
13 typedef struct
14 {
16  int16_t matrix[3][3];
17 } RGB2YUVParam;
18 // |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R|
19 // |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
20 // |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B|
21 
22 typedef struct
23 {
30 } YUV2RGBParam;
31 // |R| |y_factor 0 v_r_factor| |Y-y_shift|
32 // |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 |
33 // |B| |y_factor u_b_factor 0 | | V-128 |
34 
35 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
36 
37 // for ITU-T T.871, values can be found in section 7
38 // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
39 // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
40 // all values are rounded to the fourth decimal
41 
42 static const YUV2RGBParam YUV2RGB[3] = {
43  // ITU-T T.871 (JPEG)
44  {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
45  // ITU-R BT.601-7
46  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
47  // ITU-R BT.709-6
48  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
49 };
50 
51 static const RGB2YUVParam RGB2YUV[3] = {
52  // ITU-T T.871 (JPEG)
53  {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
54  // ITU-R BT.601-7
55  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
56  // ITU-R BT.709-6
57  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
58 };
59 
60 /* The various layouts of YUV data we support */
61 #define YUV_FORMAT_420 1
62 #define YUV_FORMAT_422 2
63 #define YUV_FORMAT_NV12 3
64 
65 /* The various formats of RGB pixel that we support */
66 #define RGB_FORMAT_RGB565 1
67 #define RGB_FORMAT_RGB24 2
68 #define RGB_FORMAT_RGBA 3
69 #define RGB_FORMAT_BGRA 4
70 #define RGB_FORMAT_ARGB 5
71 #define RGB_FORMAT_ABGR 6
72 
73 // divide by PRECISION_FACTOR and clamp to [0:255] interval
74 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
76 {
77  static const uint8_t lut[512] =
78  {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
81  47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
82  91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
83  126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
84  159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
85  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
86  225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
87  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
88  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
89  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
91  };
92  return lut[(v+128*PRECISION_FACTOR)>>PRECISION];
93 }
94 
95 
96 #define STD_FUNCTION_NAME yuv420_rgb565_std
97 #define YUV_FORMAT YUV_FORMAT_420
98 #define RGB_FORMAT RGB_FORMAT_RGB565
99 #include "yuv_rgb_std_func.h"
100 
101 #define STD_FUNCTION_NAME yuv420_rgb24_std
102 #define YUV_FORMAT YUV_FORMAT_420
103 #define RGB_FORMAT RGB_FORMAT_RGB24
104 #include "yuv_rgb_std_func.h"
105 
106 #define STD_FUNCTION_NAME yuv420_rgba_std
107 #define YUV_FORMAT YUV_FORMAT_420
108 #define RGB_FORMAT RGB_FORMAT_RGBA
109 #include "yuv_rgb_std_func.h"
110 
111 #define STD_FUNCTION_NAME yuv420_bgra_std
112 #define YUV_FORMAT YUV_FORMAT_420
113 #define RGB_FORMAT RGB_FORMAT_BGRA
114 #include "yuv_rgb_std_func.h"
115 
116 #define STD_FUNCTION_NAME yuv420_argb_std
117 #define YUV_FORMAT YUV_FORMAT_420
118 #define RGB_FORMAT RGB_FORMAT_ARGB
119 #include "yuv_rgb_std_func.h"
120 
121 #define STD_FUNCTION_NAME yuv420_abgr_std
122 #define YUV_FORMAT YUV_FORMAT_420
123 #define RGB_FORMAT RGB_FORMAT_ABGR
124 #include "yuv_rgb_std_func.h"
125 
126 #define STD_FUNCTION_NAME yuv422_rgb565_std
127 #define YUV_FORMAT YUV_FORMAT_422
128 #define RGB_FORMAT RGB_FORMAT_RGB565
129 #include "yuv_rgb_std_func.h"
130 
131 #define STD_FUNCTION_NAME yuv422_rgb24_std
132 #define YUV_FORMAT YUV_FORMAT_422
133 #define RGB_FORMAT RGB_FORMAT_RGB24
134 #include "yuv_rgb_std_func.h"
135 
136 #define STD_FUNCTION_NAME yuv422_rgba_std
137 #define YUV_FORMAT YUV_FORMAT_422
138 #define RGB_FORMAT RGB_FORMAT_RGBA
139 #include "yuv_rgb_std_func.h"
140 
141 #define STD_FUNCTION_NAME yuv422_bgra_std
142 #define YUV_FORMAT YUV_FORMAT_422
143 #define RGB_FORMAT RGB_FORMAT_BGRA
144 #include "yuv_rgb_std_func.h"
145 
146 #define STD_FUNCTION_NAME yuv422_argb_std
147 #define YUV_FORMAT YUV_FORMAT_422
148 #define RGB_FORMAT RGB_FORMAT_ARGB
149 #include "yuv_rgb_std_func.h"
150 
151 #define STD_FUNCTION_NAME yuv422_abgr_std
152 #define YUV_FORMAT YUV_FORMAT_422
153 #define RGB_FORMAT RGB_FORMAT_ABGR
154 #include "yuv_rgb_std_func.h"
155 
156 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
157 #define YUV_FORMAT YUV_FORMAT_NV12
158 #define RGB_FORMAT RGB_FORMAT_RGB565
159 #include "yuv_rgb_std_func.h"
160 
161 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
162 #define YUV_FORMAT YUV_FORMAT_NV12
163 #define RGB_FORMAT RGB_FORMAT_RGB24
164 #include "yuv_rgb_std_func.h"
165 
166 #define STD_FUNCTION_NAME yuvnv12_rgba_std
167 #define YUV_FORMAT YUV_FORMAT_NV12
168 #define RGB_FORMAT RGB_FORMAT_RGBA
169 #include "yuv_rgb_std_func.h"
170 
171 #define STD_FUNCTION_NAME yuvnv12_bgra_std
172 #define YUV_FORMAT YUV_FORMAT_NV12
173 #define RGB_FORMAT RGB_FORMAT_BGRA
174 #include "yuv_rgb_std_func.h"
175 
176 #define STD_FUNCTION_NAME yuvnv12_argb_std
177 #define YUV_FORMAT YUV_FORMAT_NV12
178 #define RGB_FORMAT RGB_FORMAT_ARGB
179 #include "yuv_rgb_std_func.h"
180 
181 #define STD_FUNCTION_NAME yuvnv12_abgr_std
182 #define YUV_FORMAT YUV_FORMAT_NV12
183 #define RGB_FORMAT RGB_FORMAT_ABGR
184 #include "yuv_rgb_std_func.h"
185 
188  const uint8_t *RGB, uint32_t RGB_stride,
189  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
190  YCbCrType yuv_type)
191 {
192  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
193 
194  uint32_t x, y;
195  for(y=0; y<(height-1); y+=2)
196  {
197  const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
198  *rgb_ptr2=RGB+(y+1)*RGB_stride;
199 
200  uint8_t *y_ptr1=Y+y*Y_stride,
201  *y_ptr2=Y+(y+1)*Y_stride,
202  *u_ptr=U+(y/2)*UV_stride,
203  *v_ptr=V+(y/2)*UV_stride;
204 
205  for(x=0; x<(width-1); x+=2)
206  {
207  // compute yuv for the four pixels, u and v values are summed
208  int32_t y_tmp, u_tmp, v_tmp;
209 
210  y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
211  u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
212  v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
213  y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
214 
215  y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
216  u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
217  v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
218  y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
219 
220  y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
221  u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
222  v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
223  y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
224 
225  y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
226  u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
227  v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
228  y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
229 
230  u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
231  v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
232 
233  rgb_ptr1 += 6;
234  rgb_ptr2 += 6;
235  y_ptr1 += 2;
236  y_ptr2 += 2;
237  u_ptr += 1;
238  v_ptr += 1;
239  }
240  }
241 }
242 
243 #ifdef __SSE2__
244 
245 #define SSE_FUNCTION_NAME yuv420_rgb565_sse
246 #define STD_FUNCTION_NAME yuv420_rgb565_std
247 #define YUV_FORMAT YUV_FORMAT_420
248 #define RGB_FORMAT RGB_FORMAT_RGB565
249 #define SSE_ALIGNED
250 #include "yuv_rgb_sse_func.h"
251 
252 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu
253 #define STD_FUNCTION_NAME yuv420_rgb565_std
254 #define YUV_FORMAT YUV_FORMAT_420
255 #define RGB_FORMAT RGB_FORMAT_RGB565
256 #include "yuv_rgb_sse_func.h"
257 
258 #define SSE_FUNCTION_NAME yuv420_rgb24_sse
259 #define STD_FUNCTION_NAME yuv420_rgb24_std
260 #define YUV_FORMAT YUV_FORMAT_420
261 #define RGB_FORMAT RGB_FORMAT_RGB24
262 #define SSE_ALIGNED
263 #include "yuv_rgb_sse_func.h"
264 
265 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu
266 #define STD_FUNCTION_NAME yuv420_rgb24_std
267 #define YUV_FORMAT YUV_FORMAT_420
268 #define RGB_FORMAT RGB_FORMAT_RGB24
269 #include "yuv_rgb_sse_func.h"
270 
271 #define SSE_FUNCTION_NAME yuv420_rgba_sse
272 #define STD_FUNCTION_NAME yuv420_rgba_std
273 #define YUV_FORMAT YUV_FORMAT_420
274 #define RGB_FORMAT RGB_FORMAT_RGBA
275 #define SSE_ALIGNED
276 #include "yuv_rgb_sse_func.h"
277 
278 #define SSE_FUNCTION_NAME yuv420_rgba_sseu
279 #define STD_FUNCTION_NAME yuv420_rgba_std
280 #define YUV_FORMAT YUV_FORMAT_420
281 #define RGB_FORMAT RGB_FORMAT_RGBA
282 #include "yuv_rgb_sse_func.h"
283 
284 #define SSE_FUNCTION_NAME yuv420_bgra_sse
285 #define STD_FUNCTION_NAME yuv420_bgra_std
286 #define YUV_FORMAT YUV_FORMAT_420
287 #define RGB_FORMAT RGB_FORMAT_BGRA
288 #define SSE_ALIGNED
289 #include "yuv_rgb_sse_func.h"
290 
291 #define SSE_FUNCTION_NAME yuv420_bgra_sseu
292 #define STD_FUNCTION_NAME yuv420_bgra_std
293 #define YUV_FORMAT YUV_FORMAT_420
294 #define RGB_FORMAT RGB_FORMAT_BGRA
295 #include "yuv_rgb_sse_func.h"
296 
297 #define SSE_FUNCTION_NAME yuv420_argb_sse
298 #define STD_FUNCTION_NAME yuv420_argb_std
299 #define YUV_FORMAT YUV_FORMAT_420
300 #define RGB_FORMAT RGB_FORMAT_ARGB
301 #define SSE_ALIGNED
302 #include "yuv_rgb_sse_func.h"
303 
304 #define SSE_FUNCTION_NAME yuv420_argb_sseu
305 #define STD_FUNCTION_NAME yuv420_argb_std
306 #define YUV_FORMAT YUV_FORMAT_420
307 #define RGB_FORMAT RGB_FORMAT_ARGB
308 #include "yuv_rgb_sse_func.h"
309 
310 #define SSE_FUNCTION_NAME yuv420_abgr_sse
311 #define STD_FUNCTION_NAME yuv420_abgr_std
312 #define YUV_FORMAT YUV_FORMAT_420
313 #define RGB_FORMAT RGB_FORMAT_ABGR
314 #define SSE_ALIGNED
315 #include "yuv_rgb_sse_func.h"
316 
317 #define SSE_FUNCTION_NAME yuv420_abgr_sseu
318 #define STD_FUNCTION_NAME yuv420_abgr_std
319 #define YUV_FORMAT YUV_FORMAT_420
320 #define RGB_FORMAT RGB_FORMAT_ABGR
321 #include "yuv_rgb_sse_func.h"
322 
323 #define SSE_FUNCTION_NAME yuv422_rgb565_sse
324 #define STD_FUNCTION_NAME yuv422_rgb565_std
325 #define YUV_FORMAT YUV_FORMAT_422
326 #define RGB_FORMAT RGB_FORMAT_RGB565
327 #define SSE_ALIGNED
328 #include "yuv_rgb_sse_func.h"
329 
330 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu
331 #define STD_FUNCTION_NAME yuv422_rgb565_std
332 #define YUV_FORMAT YUV_FORMAT_422
333 #define RGB_FORMAT RGB_FORMAT_RGB565
334 #include "yuv_rgb_sse_func.h"
335 
336 #define SSE_FUNCTION_NAME yuv422_rgb24_sse
337 #define STD_FUNCTION_NAME yuv422_rgb24_std
338 #define YUV_FORMAT YUV_FORMAT_422
339 #define RGB_FORMAT RGB_FORMAT_RGB24
340 #define SSE_ALIGNED
341 #include "yuv_rgb_sse_func.h"
342 
343 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu
344 #define STD_FUNCTION_NAME yuv422_rgb24_std
345 #define YUV_FORMAT YUV_FORMAT_422
346 #define RGB_FORMAT RGB_FORMAT_RGB24
347 #include "yuv_rgb_sse_func.h"
348 
349 #define SSE_FUNCTION_NAME yuv422_rgba_sse
350 #define STD_FUNCTION_NAME yuv422_rgba_std
351 #define YUV_FORMAT YUV_FORMAT_422
352 #define RGB_FORMAT RGB_FORMAT_RGBA
353 #define SSE_ALIGNED
354 #include "yuv_rgb_sse_func.h"
355 
356 #define SSE_FUNCTION_NAME yuv422_rgba_sseu
357 #define STD_FUNCTION_NAME yuv422_rgba_std
358 #define YUV_FORMAT YUV_FORMAT_422
359 #define RGB_FORMAT RGB_FORMAT_RGBA
360 #include "yuv_rgb_sse_func.h"
361 
362 #define SSE_FUNCTION_NAME yuv422_bgra_sse
363 #define STD_FUNCTION_NAME yuv422_bgra_std
364 #define YUV_FORMAT YUV_FORMAT_422
365 #define RGB_FORMAT RGB_FORMAT_BGRA
366 #define SSE_ALIGNED
367 #include "yuv_rgb_sse_func.h"
368 
369 #define SSE_FUNCTION_NAME yuv422_bgra_sseu
370 #define STD_FUNCTION_NAME yuv422_bgra_std
371 #define YUV_FORMAT YUV_FORMAT_422
372 #define RGB_FORMAT RGB_FORMAT_BGRA
373 #include "yuv_rgb_sse_func.h"
374 
375 #define SSE_FUNCTION_NAME yuv422_argb_sse
376 #define STD_FUNCTION_NAME yuv422_argb_std
377 #define YUV_FORMAT YUV_FORMAT_422
378 #define RGB_FORMAT RGB_FORMAT_ARGB
379 #define SSE_ALIGNED
380 #include "yuv_rgb_sse_func.h"
381 
382 #define SSE_FUNCTION_NAME yuv422_argb_sseu
383 #define STD_FUNCTION_NAME yuv422_argb_std
384 #define YUV_FORMAT YUV_FORMAT_422
385 #define RGB_FORMAT RGB_FORMAT_ARGB
386 #include "yuv_rgb_sse_func.h"
387 
388 #define SSE_FUNCTION_NAME yuv422_abgr_sse
389 #define STD_FUNCTION_NAME yuv422_abgr_std
390 #define YUV_FORMAT YUV_FORMAT_422
391 #define RGB_FORMAT RGB_FORMAT_ABGR
392 #define SSE_ALIGNED
393 #include "yuv_rgb_sse_func.h"
394 
395 #define SSE_FUNCTION_NAME yuv422_abgr_sseu
396 #define STD_FUNCTION_NAME yuv422_abgr_std
397 #define YUV_FORMAT YUV_FORMAT_422
398 #define RGB_FORMAT RGB_FORMAT_ABGR
399 #include "yuv_rgb_sse_func.h"
400 
401 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
402 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
403 #define YUV_FORMAT YUV_FORMAT_NV12
404 #define RGB_FORMAT RGB_FORMAT_RGB565
405 #define SSE_ALIGNED
406 #include "yuv_rgb_sse_func.h"
407 
408 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
409 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
410 #define YUV_FORMAT YUV_FORMAT_NV12
411 #define RGB_FORMAT RGB_FORMAT_RGB565
412 #include "yuv_rgb_sse_func.h"
413 
414 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
415 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
416 #define YUV_FORMAT YUV_FORMAT_NV12
417 #define RGB_FORMAT RGB_FORMAT_RGB24
418 #define SSE_ALIGNED
419 #include "yuv_rgb_sse_func.h"
420 
421 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
422 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
423 #define YUV_FORMAT YUV_FORMAT_NV12
424 #define RGB_FORMAT RGB_FORMAT_RGB24
425 #include "yuv_rgb_sse_func.h"
426 
427 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse
428 #define STD_FUNCTION_NAME yuvnv12_rgba_std
429 #define YUV_FORMAT YUV_FORMAT_NV12
430 #define RGB_FORMAT RGB_FORMAT_RGBA
431 #define SSE_ALIGNED
432 #include "yuv_rgb_sse_func.h"
433 
434 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
435 #define STD_FUNCTION_NAME yuvnv12_rgba_std
436 #define YUV_FORMAT YUV_FORMAT_NV12
437 #define RGB_FORMAT RGB_FORMAT_RGBA
438 #include "yuv_rgb_sse_func.h"
439 
440 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse
441 #define STD_FUNCTION_NAME yuvnv12_bgra_std
442 #define YUV_FORMAT YUV_FORMAT_NV12
443 #define RGB_FORMAT RGB_FORMAT_BGRA
444 #define SSE_ALIGNED
445 #include "yuv_rgb_sse_func.h"
446 
447 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
448 #define STD_FUNCTION_NAME yuvnv12_bgra_std
449 #define YUV_FORMAT YUV_FORMAT_NV12
450 #define RGB_FORMAT RGB_FORMAT_BGRA
451 #include "yuv_rgb_sse_func.h"
452 
453 #define SSE_FUNCTION_NAME yuvnv12_argb_sse
454 #define STD_FUNCTION_NAME yuvnv12_argb_std
455 #define YUV_FORMAT YUV_FORMAT_NV12
456 #define RGB_FORMAT RGB_FORMAT_ARGB
457 #define SSE_ALIGNED
458 #include "yuv_rgb_sse_func.h"
459 
460 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu
461 #define STD_FUNCTION_NAME yuvnv12_argb_std
462 #define YUV_FORMAT YUV_FORMAT_NV12
463 #define RGB_FORMAT RGB_FORMAT_ARGB
464 #include "yuv_rgb_sse_func.h"
465 
466 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse
467 #define STD_FUNCTION_NAME yuvnv12_abgr_std
468 #define YUV_FORMAT YUV_FORMAT_NV12
469 #define RGB_FORMAT RGB_FORMAT_ABGR
470 #define SSE_ALIGNED
471 #include "yuv_rgb_sse_func.h"
472 
473 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
474 #define STD_FUNCTION_NAME yuvnv12_abgr_std
475 #define YUV_FORMAT YUV_FORMAT_NV12
476 #define RGB_FORMAT RGB_FORMAT_ABGR
477 #include "yuv_rgb_sse_func.h"
478 
479 
480 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
481 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
482 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
483 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
484 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
485 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
486 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
487 
488 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
489 RGB1 = _mm_unpacklo_epi8(R1, G2); \
490 RGB2 = _mm_unpackhi_epi8(R1, G2); \
491 RGB3 = _mm_unpacklo_epi8(R2, B1); \
492 RGB4 = _mm_unpackhi_epi8(R2, B1); \
493 RGB5 = _mm_unpacklo_epi8(G1, B2); \
494 RGB6 = _mm_unpackhi_epi8(G1, B2); \
495 
496 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
497 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
498 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502 
503 #define RGB2YUV_16(R, G, B, Y, U, V) \
504 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
505  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
506 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
507 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
508 Y = _mm_srai_epi16(Y, PRECISION); \
509 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
510  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
511 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
512 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
513 U = _mm_srai_epi16(U, PRECISION); \
514 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
515  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
516 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
517 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
518 V = _mm_srai_epi16(V, PRECISION);
519 
520 #define RGB2YUV_32 \
521  __m128i r1, r2, b1, b2, g1, g2; \
522  __m128i r_16, g_16, b_16; \
523  __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
524  __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
525  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
526  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
527  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
528  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
529  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
530  /* unpack rgb24 data to r, g and b data in separate channels*/ \
531  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
532  /* process pixels of first line */ \
533  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
534  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
535  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
536  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
537  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
538  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
539  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
540  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
541  y = _mm_packus_epi16(y1_16, y2_16); \
542  u1 = _mm_packus_epi16(u1_16, u2_16); \
543  v1 = _mm_packus_epi16(v1_16, v2_16); \
544  /* save Y values */ \
545  SAVE_SI128((__m128i*)(y_ptr1), y); \
546  /* process pixels of second line */ \
547  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
548  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
549  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
550  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
551  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
552  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
553  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
554  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
555  y = _mm_packus_epi16(y1_16, y2_16); \
556  u2 = _mm_packus_epi16(u1_16, u2_16); \
557  v2 = _mm_packus_epi16(v1_16, v2_16); \
558  /* save Y values */ \
559  SAVE_SI128((__m128i*)(y_ptr2), y); \
560  /* vertical subsampling of u/v values */ \
561  u1_tmp = _mm_avg_epu8(u1, u2); \
562  v1_tmp = _mm_avg_epu8(v1, v2); \
563  /* do the same again with next data */ \
564  rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
565  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
566  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
567  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
568  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
569  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
570  /* unpack rgb24 data to r, g and b data in separate channels*/ \
571  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
572  /* process pixels of first line */ \
573  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
574  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
575  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
576  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
577  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
578  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
579  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
580  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
581  y = _mm_packus_epi16(y1_16, y2_16); \
582  u1 = _mm_packus_epi16(u1_16, u2_16); \
583  v1 = _mm_packus_epi16(v1_16, v2_16); \
584  /* save Y values */ \
585  SAVE_SI128((__m128i*)(y_ptr1+16), y); \
586  /* process pixels of second line */ \
587  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
588  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
589  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
590  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
591  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
592  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
593  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
594  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
595  y = _mm_packus_epi16(y1_16, y2_16); \
596  u2 = _mm_packus_epi16(u1_16, u2_16); \
597  v2 = _mm_packus_epi16(v1_16, v2_16); \
598  /* save Y values */ \
599  SAVE_SI128((__m128i*)(y_ptr2+16), y); \
600  /* vertical subsampling of u/v values */ \
601  u2_tmp = _mm_avg_epu8(u1, u2); \
602  v2_tmp = _mm_avg_epu8(v1, v2); \
603  /* horizontal subsampling of u/v values */ \
604  u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
605  v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
606  u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
607  v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
608  u1 = _mm_avg_epu8(u1, u2); \
609  v1 = _mm_avg_epu8(v1, v2); \
610  SAVE_SI128((__m128i*)(u_ptr), u1); \
611  SAVE_SI128((__m128i*)(v_ptr), v1);
612 
614  const uint8_t *RGB, uint32_t RGB_stride,
615  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
616  YCbCrType yuv_type)
617 {
618  #define LOAD_SI128 _mm_load_si128
619  #define SAVE_SI128 _mm_stream_si128
620  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
621 
622  uint32_t xpos, ypos;
623  for(ypos=0; ypos<(height-1); ypos+=2)
624  {
625  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
626  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
627 
628  uint8_t *y_ptr1=Y+ypos*Y_stride,
629  *y_ptr2=Y+(ypos+1)*Y_stride,
630  *u_ptr=U+(ypos/2)*UV_stride,
631  *v_ptr=V+(ypos/2)*UV_stride;
632 
633  for(xpos=0; xpos<(width-31); xpos+=32)
634  {
635  RGB2YUV_32
636 
637  rgb_ptr1+=96;
638  rgb_ptr2+=96;
639  y_ptr1+=32;
640  y_ptr2+=32;
641  u_ptr+=16;
642  v_ptr+=16;
643  }
644  }
645  #undef LOAD_SI128
646  #undef SAVE_SI128
647 }
648 
650  const uint8_t *RGB, uint32_t RGB_stride,
651  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
652  YCbCrType yuv_type)
653 {
654  #define LOAD_SI128 _mm_loadu_si128
655  #define SAVE_SI128 _mm_storeu_si128
656  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
657 
658  uint32_t xpos, ypos;
659  for(ypos=0; ypos<(height-1); ypos+=2)
660  {
661  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
662  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
663 
664  uint8_t *y_ptr1=Y+ypos*Y_stride,
665  *y_ptr2=Y+(ypos+1)*Y_stride,
666  *u_ptr=U+(ypos/2)*UV_stride,
667  *v_ptr=V+(ypos/2)*UV_stride;
668 
669  for(xpos=0; xpos<(width-31); xpos+=32)
670  {
671  RGB2YUV_32
672 
673  rgb_ptr1+=96;
674  rgb_ptr2+=96;
675  y_ptr1+=32;
676  y_ptr2+=32;
677  u_ptr+=16;
678  v_ptr+=16;
679  }
680  }
681  #undef LOAD_SI128
682  #undef SAVE_SI128
683 }
684 
685 
686 #endif //__SSE2__
687 
GLuint GLenum matrix
Definition: edid.h:20
const GLdouble * v
Definition: SDL_opengl.h:2064
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
signed int int32_t
int16_t y_factor
Definition: yuv_rgb.c:25
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const RGB2YUVParam RGB2YUV[3]
Definition: yuv_rgb.c:51
#define PRECISION
Definition: yuv_rgb.c:10
#define PRECISION_FACTOR
Definition: yuv_rgb.c:11
int16_t v_g_factor
Definition: yuv_rgb.c:28
signed short int16_t
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
Definition: yuv_rgb.c:186
int16_t u_b_factor
Definition: yuv_rgb.c:29
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:42
GLint GLint GLint GLint GLint GLint y
Definition: SDL_opengl.h:1574
static uint8_t clampU8(int32_t v)
Definition: yuv_rgb.c:75
int16_t u_g_factor
Definition: yuv_rgb.c:27
YCbCrType
Definition: yuv_rgb.h:22
uint8_t y_shift
Definition: yuv_rgb.c:15
unsigned char uint8_t
unsigned int uint32_t
int16_t matrix[3][3]
Definition: yuv_rgb.c:16
uint8_t y_shift
Definition: yuv_rgb.c:24
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
int16_t v_r_factor
Definition: yuv_rgb.c:26
#define V(value)
Definition: yuv_rgb.c:35
GLfloat param