21 #include "../SDL_internal.h" 45 const unsigned A = info->
a;
60 if ( palmap ==
NULL ) {
61 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
63 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
90 unsigned sR, sG, sB, sA;
106 if ( palmap ==
NULL ) {
107 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
109 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
139 const unsigned A = info->
a;
146 if ( Pixel != ckey ) {
155 if ( palmap ==
NULL ) {
156 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
158 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
185 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
187 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe);
188 lmask = _mm_set_pi32(0x00010101, 0x00010101);
189 dsta = _mm_set_pi32(dalpha, dalpha);
196 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197 + (s & d & 0x00010101)) | dalpha;
201 for (n >>= 1; n > 0; --
n) {
202 dst1 = *(__m64 *) dstp;
205 src1 = *(__m64 *) srcp;
208 dst2 = _mm_and_si64(dst2, hmask);
209 src2 = _mm_and_si64(src2, hmask);
210 src2 = _mm_add_pi32(src2, dst2);
211 src2 = _mm_srli_pi32(src2, 1);
213 dst1 = _mm_and_si64(dst1, src1);
214 dst1 = _mm_and_si64(dst1, lmask);
215 dst1 = _mm_add_pi32(dst1, src2);
216 dst1 = _mm_or_si64(dst1, dsta);
218 *(__m64 *) dstp = dst1;
237 if (alpha == 128 && (df->
Rmask | df->
Gmask | df->
Bmask) == 0x00FFFFFF) {
239 BlitRGBtoRGBSurfaceAlpha128MMX(info);
250 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
252 mm_zero = _mm_setzero_si64();
254 amult = alpha | (alpha << 8);
255 amult = amult | (amult << 16);
257 (0xff << df->
Rshift) | (0xff << df->
258 Gshift) | (0xff << df->
Bshift);
259 mm_alpha = _mm_set_pi32(0, amult & chanmask);
260 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero);
262 dsta = _mm_set_pi32(dalpha, dalpha);
268 src2 = _mm_cvtsi32_si64(*srcp);
269 src2 = _mm_unpacklo_pi8(src2, mm_zero);
271 dst1 = _mm_cvtsi32_si64(*dstp);
272 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
274 src2 = _mm_sub_pi16(src2, dst1);
275 src2 = _mm_mullo_pi16(src2, mm_alpha);
276 src2 = _mm_srli_pi16(src2, 8);
277 dst1 = _mm_add_pi8(src2, dst1);
279 dst1 = _mm_packs_pu16(dst1, mm_zero);
280 dst1 = _mm_or_si64(dst1, dsta);
281 *dstp = _mm_cvtsi64_si32(dst1);
289 for (n >>= 1; n > 0; --
n) {
291 src1 = *(__m64 *) srcp;
293 src1 = _mm_unpacklo_pi8(src1, mm_zero);
294 src2 = _mm_unpackhi_pi8(src2, mm_zero);
296 dst1 = *(__m64 *) dstp;
298 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
299 dst2 = _mm_unpackhi_pi8(dst2, mm_zero);
301 src1 = _mm_sub_pi16(src1, dst1);
302 src1 = _mm_mullo_pi16(src1, mm_alpha);
303 src1 = _mm_srli_pi16(src1, 8);
304 dst1 = _mm_add_pi8(src1, dst1);
306 src2 = _mm_sub_pi16(src2, dst2);
307 src2 = _mm_mullo_pi16(src2, mm_alpha);
308 src2 = _mm_srli_pi16(src2, 8);
309 dst2 = _mm_add_pi8(src2, dst2);
311 dst1 = _mm_packs_pu16(dst1, dst2);
312 dst1 = _mm_or_si64(dst1, dsta);
314 *(__m64 *) dstp = dst1;
339 Uint64 multmask, multmask2;
341 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
343 mm_zero = _mm_setzero_si64();
345 multmask <<= (ashift * 2);
346 multmask2 = 0x00FF00FF00FF00FFULL;
354 }
else if (alpha == amask) {
357 src1 = _mm_cvtsi32_si64(*srcp);
358 src1 = _mm_unpacklo_pi8(src1, mm_zero);
360 dst1 = _mm_cvtsi32_si64(*dstp);
361 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
363 mm_alpha = _mm_cvtsi32_si64(alpha);
364 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
365 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
366 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
367 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
368 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
371 src1 = _mm_mullo_pi16(src1, mm_alpha);
372 src1 = _mm_srli_pi16(src1, 8);
373 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
374 dst1 = _mm_srli_pi16(dst1, 8);
375 dst1 = _mm_add_pi16(src1, dst1);
376 dst1 = _mm_packs_pu16(dst1, mm_zero);
378 *dstp = _mm_cvtsi64_si32(dst1);
408 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
409 + (s & d & 0x00010101)) | 0xff000000;
443 d1 = (d1 + ((s1 - d1) * alpha >> 8))
447 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
448 *dstp = d1 | d | 0xff000000;
495 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
498 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
499 dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
500 *dstp = d1 | d | (dalpha << 24);
526 Uint64 multmask, multmask2;
528 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
530 mm_zero = _mm_setzero_si64();
532 multmask <<= (ashift * 2);
533 multmask2 = 0x00FF00FF00FF00FFULL;
540 _m_prefetch(srcp + 16);
541 _m_prefetch(dstp + 16);
543 alpha = *srcp & amask;
546 }
else if (alpha == amask) {
549 src1 = _mm_cvtsi32_si64(*srcp);
550 src1 = _mm_unpacklo_pi8(src1, mm_zero);
552 dst1 = _mm_cvtsi32_si64(*dstp);
553 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
555 mm_alpha = _mm_cvtsi32_si64(alpha);
556 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
557 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
558 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
559 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
560 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
564 src1 = _mm_mullo_pi16(src1, mm_alpha);
565 src1 = _mm_srli_pi16(src1, 8);
566 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
567 dst1 = _mm_srli_pi16(dst1, 8);
568 dst1 = _mm_add_pi16(src1, dst1);
569 dst1 = _mm_packs_pu16(dst1, mm_zero);
571 *dstp = _mm_cvtsi64_si32(dst1);
588 #define BLEND16_50(d, s, mask) \ 589 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) 592 #define BLEND2x16_50(d, s, mask) \ 593 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 594 + (s & d & (~(mask | mask << 16)))) 627 prev_sw = ((
Uint32 *) srcp)[-1];
633 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 634 s = (prev_sw << 16) + (sw >> 16);
636 s = (prev_sw >> 16) + (sw << 16);
648 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 651 s = (
Uint16) (prev_sw >> 16);
713 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
715 alpha &= ~(1 + 2 + 4);
716 mm_alpha = _mm_set_pi32(0, alpha);
719 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
720 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
723 mm_alpha = _mm_slli_si64(mm_alpha, 3);
726 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0);
727 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
740 s = (s | s << 16) & 0x07e0f81f;
741 d = (d | d << 16) & 0x07e0f81f;
742 d += (s -
d) * alpha >> 5;
744 *dstp++ = (
Uint16)(d | d >> 16);
753 s = (s | s << 16) & 0x07e0f81f;
754 d = (d | d << 16) & 0x07e0f81f;
755 d += (s -
d) * alpha >> 5;
757 *dstp++ = (
Uint16)(d | d >> 16);
765 s = (s | s << 16) & 0x07e0f81f;
766 d = (d | d << 16) & 0x07e0f81f;
767 d += (s -
d) * alpha >> 5;
769 *dstp++ = (
Uint16)(d | d >> 16);
771 src1 = *(__m64*)srcp;
772 dst1 = *(__m64*)dstp;
776 src2 = _mm_srli_pi16(src2, 11);
779 dst2 = _mm_srli_pi16(dst2, 11);
782 src2 = _mm_sub_pi16(src2, dst2);
783 src2 = _mm_mullo_pi16(src2, mm_alpha);
784 src2 = _mm_srli_pi16(src2, 11);
785 dst2 = _mm_add_pi16(src2, dst2);
786 dst2 = _mm_slli_pi16(dst2, 11);
792 src2 = _mm_and_si64(src2, gmask);
795 dst2 = _mm_and_si64(dst2, gmask);
798 src2 = _mm_sub_pi16(src2, dst2);
799 src2 = _mm_mulhi_pi16(src2, mm_alpha);
800 src2 = _mm_slli_pi16(src2, 5);
801 dst2 = _mm_add_pi16(src2, dst2);
803 mm_res = _mm_or_si64(mm_res, dst2);
807 src2 = _mm_and_si64(src2, bmask);
810 dst2 = _mm_and_si64(dst2, bmask);
813 src2 = _mm_sub_pi16(src2, dst2);
814 src2 = _mm_mullo_pi16(src2, mm_alpha);
815 src2 = _mm_srli_pi16(src2, 11);
816 dst2 = _mm_add_pi16(src2, dst2);
817 dst2 = _mm_and_si64(dst2, bmask);
819 mm_res = _mm_or_si64(mm_res, dst2);
821 *(__m64*)dstp = mm_res;
850 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
852 alpha &= ~(1 + 2 + 4);
853 mm_alpha = _mm_set_pi32(0, alpha);
856 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
857 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
860 mm_alpha = _mm_slli_si64(mm_alpha, 3);
863 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00);
864 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0);
865 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
878 s = (s | s << 16) & 0x03e07c1f;
879 d = (d | d << 16) & 0x03e07c1f;
880 d += (s -
d) * alpha >> 5;
882 *dstp++ = (
Uint16)(d | d >> 16);
891 s = (s | s << 16) & 0x03e07c1f;
892 d = (d | d << 16) & 0x03e07c1f;
893 d += (s -
d) * alpha >> 5;
895 *dstp++ = (
Uint16)(d | d >> 16);
903 s = (s | s << 16) & 0x03e07c1f;
904 d = (d | d << 16) & 0x03e07c1f;
905 d += (s -
d) * alpha >> 5;
907 *dstp++ = (
Uint16)(d | d >> 16);
909 src1 = *(__m64*)srcp;
910 dst1 = *(__m64*)dstp;
914 src2 = _mm_and_si64(src2, rmask);
917 dst2 = _mm_and_si64(dst2, rmask);
920 src2 = _mm_sub_pi16(src2, dst2);
921 src2 = _mm_mulhi_pi16(src2, mm_alpha);
922 src2 = _mm_slli_pi16(src2, 5);
923 dst2 = _mm_add_pi16(src2, dst2);
924 dst2 = _mm_and_si64(dst2, rmask);
930 src2 = _mm_and_si64(src2, gmask);
933 dst2 = _mm_and_si64(dst2, gmask);
936 src2 = _mm_sub_pi16(src2, dst2);
937 src2 = _mm_mulhi_pi16(src2, mm_alpha);
938 src2 = _mm_slli_pi16(src2, 5);
939 dst2 = _mm_add_pi16(src2, dst2);
941 mm_res = _mm_or_si64(mm_res, dst2);
945 src2 = _mm_and_si64(src2, bmask);
948 dst2 = _mm_and_si64(dst2, bmask);
951 src2 = _mm_sub_pi16(src2, dst2);
952 src2 = _mm_mullo_pi16(src2, mm_alpha);
953 src2 = _mm_srli_pi16(src2, 11);
954 dst2 = _mm_add_pi16(src2, dst2);
955 dst2 = _mm_and_si64(dst2, bmask);
957 mm_res = _mm_or_si64(mm_res, dst2);
959 *(__m64*)dstp = mm_res;
1000 s = (s | s << 16) & 0x07e0f81f;
1001 d = (d | d << 16) & 0x07e0f81f;
1002 d += (s -
d) * alpha >> 5;
1004 *dstp++ = (
Uint16)(d | d >> 16);
1017 unsigned alpha = info->
a;
1039 s = (s | s << 16) & 0x03e07c1f;
1040 d = (d | d << 16) & 0x03e07c1f;
1041 d += (s -
d) * alpha >> 5;
1043 *dstp++ = (
Uint16)(d | d >> 16);
1067 unsigned alpha = s >> 27;
1074 *dstp = (
Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1081 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1083 d = (d | d << 16) & 0x07e0f81f;
1084 d += (s -
d) * alpha >> 5;
1086 *dstp = (
Uint16)(d | d >> 16);
1121 *dstp = (
Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1128 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1130 d = (d | d << 16) & 0x03e07c1f;
1131 d += (s -
d) * alpha >> 5;
1133 *dstp = (
Uint16)(d | d >> 16);
1160 unsigned sR, sG, sB;
1161 unsigned dR, dG, dB, dA;
1162 const unsigned sA = info->
a;
1200 unsigned sR, sG, sB;
1201 unsigned dR, dG, dB, dA;
1202 const unsigned sA = info->
a;
1209 if(sA && Pixel != ckey) {
1240 unsigned sR, sG, sB, sA;
1241 unsigned dR, dG, dB, dA;
1283 && sf->
Gmask == 0xff00
1285 || (sf->
Bmask == 0xff && df->
Bmask == 0x1f))) {
1286 if (df->
Gmask == 0x7e0)
1288 else if (df->
Gmask == 0x3e0)
1297 #if defined(__MMX__) || defined(__3dNOW__) 1304 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1308 return BlitRGBtoRGBPixelAlphaMMX;
1312 if (sf->
Amask == 0xff000000) {
1325 if (sf->
Amask == 0) {
1333 if (df->
Gmask == 0x7e0) {
1336 return Blit565to565SurfaceAlphaMMX;
1340 }
else if (df->
Gmask == 0x3e0) {
1343 return Blit555to555SurfaceAlphaMMX;
1359 return BlitRGBtoRGBSurfaceAlphaMMX;
1375 if (sf->
Amask == 0) {
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
#define BLEND16_50(d, s, mask)
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
#define SDL_COPY_COLORKEY
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
SDL_PixelFormat * src_fmt
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
A collection of pixels used in software blitting.
#define SDL_COPY_RLE_MASK
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
GLfloat GLfloat GLfloat alpha
GLint GLint GLsizei width
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
#define DUFFS_LOOP4(pixel_copy_increment, width)
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
#define DUFFS_LOOP(pixel_copy_increment, width)
GLubyte GLubyte GLubyte GLubyte w
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
SDL_PixelFormat * dst_fmt
GLint GLint GLsizei GLsizei height
#define SDL_COPY_MODULATE_ALPHA
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
void(* SDL_BlitFunc)(SDL_BlitInfo *info)
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
#define DUFFS_LOOP_124(pixel_copy_increment1, pixel_copy_increment2, pixel_copy_increment4, width)
#define BLEND2x16_50(d, s, mask)