21 #include "../SDL_internal.h" 25 #ifdef USE_MMX_ASSEMBLY 33 static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
34 static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
35 static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
37 static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
39 static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
40 static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
41 static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
42 static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
44 static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
45 static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
46 static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
47 static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
49 static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
50 static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
81 void ColorRGBDitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
82 unsigned char *lum,
unsigned char *cr,
83 unsigned char *cb,
unsigned char *out,
84 int rows,
int cols,
int mod )
89 unsigned char*
y = lum +cols*rows;
92 row2 = (
Uint32 *)out+cols+mod;
93 mod = (mod+cols+mod)*4;
95 __asm__ __volatile__ (
103 "punpcklbw %%mm7,%%mm1\n" 104 "punpckldq %%mm1,%%mm1\n" 116 "movq (%2,%4),%%mm3\n" 117 "punpckldq %%mm3,%%mm2\n" 126 "paddsw %%mm1, %%mm5\n" 127 "paddsw %%mm1, %%mm6\n" 128 "packuswb %%mm5,%%mm5\n" 129 "packuswb %%mm6,%%mm6\n" 131 "punpcklbw %%mm5,%%mm6\n" 135 "punpcklbw %%mm7,%%mm1\n" 136 "punpckldq %%mm1,%%mm1\n" 149 "paddsw %%mm5, %%mm3\n" 150 "paddsw %%mm5, %%mm7\n" 151 "paddsw %%mm0, %%mm3\n" 152 "paddsw %%mm0, %%mm7\n" 153 "packuswb %%mm3,%%mm3\n" 154 "packuswb %%mm7,%%mm7\n" 155 "punpcklbw %%mm3,%%mm7\n" 160 "paddsw %%mm1, %%mm3\n" 161 "paddsw %%mm1, %%mm5\n" 162 "packuswb %%mm3,%%mm3\n" 163 "packuswb %%mm5,%%mm5\n" 164 "punpcklbw %%mm3,%%mm5\n" 174 "punpcklbw %%mm4,%%mm1\n" 175 "punpcklbw %%mm4,%%mm3\n" 178 "punpcklwd %%mm1,%%mm3\n" 179 "punpckhwd %%mm2,%%mm0\n" 183 "punpcklbw %%mm1,%%mm2\n" 184 "punpcklwd %%mm4,%%mm2\n" 189 "punpcklbw %%mm1,%%mm4\n" 190 "punpckhwd %%mm2,%%mm4\n" 197 "punpckhbw %%mm2,%%mm6\n" 198 "punpckhbw %%mm1,%%mm5\n" 200 "punpcklwd %%mm6,%%mm1\n" 202 "punpckhwd %%mm6,%%mm5\n" 224 :
"r" (cr),
"r"(cb),
"r"(lum),
225 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
226 "m"(MMX_0080w),
"m"(MMX_VgrnRGB),
"m"(MMX_VredRGB),
227 "m"(MMX_FF00w),
"m"(MMX_00FFw),
"m"(MMX_UgrnRGB),
232 void Color565DitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
233 unsigned char *lum,
unsigned char *cr,
234 unsigned char *cb,
unsigned char *out,
235 int rows,
int cols,
int mod )
240 unsigned char* y = lum +cols*rows;
243 row2 = (
Uint16 *)out+cols+mod;
244 mod = (mod+cols+mod)*2;
246 __asm__ __volatile__(
251 "pxor %%mm7, %%mm7\n" 254 "punpcklbw %%mm7, %%mm0\n" 255 "punpcklbw %%mm7, %%mm1\n" 258 "movq %%mm0, %%mm2\n" 259 "movq %%mm1, %%mm3\n" 260 "pmullw %10, %%mm2\n" 262 "pmullw %11, %%mm0\n" 264 "pmullw %13, %%mm3\n" 266 "pmullw %14, %%mm1\n" 268 "pmullw %15, %%mm6\n" 269 "paddw %%mm3, %%mm2\n" 270 "pmullw %15, %%mm7\n" 272 "movq %%mm6, %%mm4\n" 273 "paddw %%mm0, %%mm6\n" 274 "movq %%mm4, %%mm5\n" 275 "paddw %%mm1, %%mm4\n" 276 "paddw %%mm2, %%mm5\n" 278 "movq %%mm7, %%mm3\n" 280 "paddw %%mm0, %%mm7\n" 282 "packuswb %%mm4, %%mm4\n" 283 "packuswb %%mm5, %%mm5\n" 284 "packuswb %%mm6, %%mm6\n" 285 "punpcklbw %%mm4, %%mm4\n" 286 "punpcklbw %%mm5, %%mm5\n" 290 "punpcklbw %%mm6, %%mm6\n" 295 "movq %%mm3, %%mm5\n" 296 "paddw %%mm1, %%mm3\n" 297 "paddw %%mm2, %%mm5\n" 301 "movq (%2, %4), %%mm6\n" 303 "packuswb %%mm3, %%mm3\n" 304 "packuswb %%mm5, %%mm5\n" 305 "packuswb %%mm7, %%mm7\n" 307 "punpcklbw %%mm3, %%mm3\n" 308 "punpcklbw %%mm5, %%mm5\n" 309 "pmullw %15, %%mm6\n" 310 "punpcklbw %%mm7, %%mm7\n" 317 "movq (%2,%4), %%mm7\n" 320 "movq %%mm4, %%mm5\n" 321 "punpcklwd %%mm3, %%mm4\n" 322 "pmullw %15, %%mm7\n" 323 "punpckhwd %%mm3, %%mm5\n" 326 "movq %%mm5, 8(%3)\n" 328 "movq %%mm6, %%mm4\n" 329 "paddw %%mm0, %%mm6\n" 331 "movq %%mm4, %%mm5\n" 332 "paddw %%mm1, %%mm4\n" 333 "paddw %%mm2, %%mm5\n" 335 "movq %%mm7, %%mm3\n" 337 "paddw %%mm0, %%mm7\n" 339 "movq %%mm3, %%mm0\n" 340 "packuswb %%mm4, %%mm4\n" 341 "paddw %%mm1, %%mm3\n" 342 "packuswb %%mm5, %%mm5\n" 343 "paddw %%mm2, %%mm0\n" 344 "packuswb %%mm6, %%mm6\n" 345 "punpcklbw %%mm4, %%mm4\n" 346 "punpcklbw %%mm5, %%mm5\n" 347 "punpcklbw %%mm6, %%mm6\n" 358 "packuswb %%mm3, %%mm3\n" 359 "packuswb %%mm0, %%mm0\n" 360 "packuswb %%mm7, %%mm7\n" 361 "punpcklbw %%mm3, %%mm3\n" 362 "punpcklbw %%mm0, %%mm0\n" 363 "punpcklbw %%mm7, %%mm7\n" 372 "movq %%mm4, %%mm5\n" 374 "punpcklwd %%mm3, %%mm4\n" 375 "punpckhwd %%mm3, %%mm5\n" 378 "movq %%mm5, 8(%5)\n" 397 :
"r" (cr),
"r"(cb),
"r"(lum),
398 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
399 "m"(MMX_0080w),
"m"(MMX_Ugrn565),
"m"(MMX_Ublu5x5),
400 "m"(MMX_00FFw),
"m"(MMX_Vgrn565),
"m"(MMX_Vred5x5),
401 "m"(MMX_Ycoeff),
"m"(MMX_red565),
"m"(MMX_grn565)
GLint GLint GLint GLint GLint x
uint32_t Uint32
An unsigned 32-bit integer type.
GLint GLint GLint GLint GLint GLint y
uint16_t Uint16
An unsigned 16-bit integer type.