SDL  2.0
SDL_blit_A.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22 
23 #include "SDL_video.h"
24 #include "SDL_blit.h"
25 
26 /* Functions to perform alpha blended blitting */
27 
28 /* N->1 blending with per-surface alpha */
29 static void
31 {
32  int width = info->dst_w;
33  int height = info->dst_h;
34  Uint8 *src = info->src;
35  int srcskip = info->src_skip;
36  Uint8 *dst = info->dst;
37  int dstskip = info->dst_skip;
38  Uint8 *palmap = info->table;
39  SDL_PixelFormat *srcfmt = info->src_fmt;
40  SDL_PixelFormat *dstfmt = info->dst_fmt;
41  int srcbpp = srcfmt->BytesPerPixel;
42  Uint32 Pixel;
43  unsigned sR, sG, sB;
44  unsigned dR, dG, dB;
45  const unsigned A = info->a;
46 
47  while (height--) {
48  /* *INDENT-OFF* */
50  {
51  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
52  dR = dstfmt->palette->colors[*dst].r;
53  dG = dstfmt->palette->colors[*dst].g;
54  dB = dstfmt->palette->colors[*dst].b;
55  ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
56  dR &= 0xff;
57  dG &= 0xff;
58  dB &= 0xff;
59  /* Pack RGB into 8bit pixel */
60  if ( palmap == NULL ) {
61  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
62  } else {
63  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
64  }
65  dst++;
66  src += srcbpp;
67  },
68  width);
69  /* *INDENT-ON* */
70  src += srcskip;
71  dst += dstskip;
72  }
73 }
74 
75 /* N->1 blending with pixel alpha */
76 static void
78 {
79  int width = info->dst_w;
80  int height = info->dst_h;
81  Uint8 *src = info->src;
82  int srcskip = info->src_skip;
83  Uint8 *dst = info->dst;
84  int dstskip = info->dst_skip;
85  Uint8 *palmap = info->table;
86  SDL_PixelFormat *srcfmt = info->src_fmt;
87  SDL_PixelFormat *dstfmt = info->dst_fmt;
88  int srcbpp = srcfmt->BytesPerPixel;
89  Uint32 Pixel;
90  unsigned sR, sG, sB, sA;
91  unsigned dR, dG, dB;
92 
93  while (height--) {
94  /* *INDENT-OFF* */
96  {
97  DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
98  dR = dstfmt->palette->colors[*dst].r;
99  dG = dstfmt->palette->colors[*dst].g;
100  dB = dstfmt->palette->colors[*dst].b;
101  ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB);
102  dR &= 0xff;
103  dG &= 0xff;
104  dB &= 0xff;
105  /* Pack RGB into 8bit pixel */
106  if ( palmap == NULL ) {
107  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
108  } else {
109  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
110  }
111  dst++;
112  src += srcbpp;
113  },
114  width);
115  /* *INDENT-ON* */
116  src += srcskip;
117  dst += dstskip;
118  }
119 }
120 
121 /* colorkeyed N->1 blending with per-surface alpha */
122 static void
124 {
125  int width = info->dst_w;
126  int height = info->dst_h;
127  Uint8 *src = info->src;
128  int srcskip = info->src_skip;
129  Uint8 *dst = info->dst;
130  int dstskip = info->dst_skip;
131  Uint8 *palmap = info->table;
132  SDL_PixelFormat *srcfmt = info->src_fmt;
133  SDL_PixelFormat *dstfmt = info->dst_fmt;
134  int srcbpp = srcfmt->BytesPerPixel;
135  Uint32 ckey = info->colorkey;
136  Uint32 Pixel;
137  unsigned sR, sG, sB;
138  unsigned dR, dG, dB;
139  const unsigned A = info->a;
140 
141  while (height--) {
142  /* *INDENT-OFF* */
143  DUFFS_LOOP(
144  {
145  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
146  if ( Pixel != ckey ) {
147  dR = dstfmt->palette->colors[*dst].r;
148  dG = dstfmt->palette->colors[*dst].g;
149  dB = dstfmt->palette->colors[*dst].b;
150  ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
151  dR &= 0xff;
152  dG &= 0xff;
153  dB &= 0xff;
154  /* Pack RGB into 8bit pixel */
155  if ( palmap == NULL ) {
156  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
157  } else {
158  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
159  }
160  }
161  dst++;
162  src += srcbpp;
163  },
164  width);
165  /* *INDENT-ON* */
166  src += srcskip;
167  dst += dstskip;
168  }
169 }
170 
171 #ifdef __MMX__
172 
173 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
174 static void
175 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
176 {
177  int width = info->dst_w;
178  int height = info->dst_h;
179  Uint32 *srcp = (Uint32 *) info->src;
180  int srcskip = info->src_skip >> 2;
181  Uint32 *dstp = (Uint32 *) info->dst;
182  int dstskip = info->dst_skip >> 2;
183  Uint32 dalpha = info->dst_fmt->Amask;
184 
185  __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
186 
187  hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
188  lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
189  dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
190 
191  while (height--) {
192  int n = width;
193  if (n & 1) {
194  Uint32 s = *srcp++;
195  Uint32 d = *dstp;
196  *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197  + (s & d & 0x00010101)) | dalpha;
198  n--;
199  }
200 
201  for (n >>= 1; n > 0; --n) {
202  dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
203  dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
204 
205  src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
206  src2 = src1; /* 2 x src -> src2(ARGBARGB) */
207 
208  dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
209  src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
210  src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
211  src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
212 
213  dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
214  dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
215  dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
216  dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
217 
218  *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
219  dstp += 2;
220  srcp += 2;
221  }
222 
223  srcp += srcskip;
224  dstp += dstskip;
225  }
226  _mm_empty();
227 }
228 
229 /* fast RGB888->(A)RGB888 blending with surface alpha */
230 static void
231 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
232 {
233  SDL_PixelFormat *df = info->dst_fmt;
234  Uint32 chanmask;
235  unsigned alpha = info->a;
236 
237  if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
238  /* only call a128 version when R,G,B occupy lower bits */
239  BlitRGBtoRGBSurfaceAlpha128MMX(info);
240  } else {
241  int width = info->dst_w;
242  int height = info->dst_h;
243  Uint32 *srcp = (Uint32 *) info->src;
244  int srcskip = info->src_skip >> 2;
245  Uint32 *dstp = (Uint32 *) info->dst;
246  int dstskip = info->dst_skip >> 2;
247  Uint32 dalpha = df->Amask;
248  Uint32 amult;
249 
250  __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
251 
252  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
253  /* form the alpha mult */
254  amult = alpha | (alpha << 8);
255  amult = amult | (amult << 16);
256  chanmask =
257  (0xff << df->Rshift) | (0xff << df->
258  Gshift) | (0xff << df->Bshift);
259  mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
260  mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
261  /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
262  dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
263 
264  while (height--) {
265  int n = width;
266  if (n & 1) {
267  /* One Pixel Blend */
268  src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
269  src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
270 
271  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
272  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
273 
274  src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
275  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
276  src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
277  dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
278 
279  dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
280  dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
281  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
282 
283  ++srcp;
284  ++dstp;
285 
286  n--;
287  }
288 
289  for (n >>= 1; n > 0; --n) {
290  /* Two Pixels Blend */
291  src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
292  src2 = src1; /* 2 x src -> src2(ARGBARGB) */
293  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
294  src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
295 
296  dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
297  dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
298  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
299  dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
300 
301  src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
302  src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
303  src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
304  dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
305 
306  src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
307  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
308  src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
309  dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
310 
311  dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
312  dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
313 
314  *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
315 
316  srcp += 2;
317  dstp += 2;
318  }
319  srcp += srcskip;
320  dstp += dstskip;
321  }
322  _mm_empty();
323  }
324 }
325 
326 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
327 static void
328 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
329 {
330  int width = info->dst_w;
331  int height = info->dst_h;
332  Uint32 *srcp = (Uint32 *) info->src;
333  int srcskip = info->src_skip >> 2;
334  Uint32 *dstp = (Uint32 *) info->dst;
335  int dstskip = info->dst_skip >> 2;
336  SDL_PixelFormat *sf = info->src_fmt;
337  Uint32 amask = sf->Amask;
338  Uint32 ashift = sf->Ashift;
339  Uint64 multmask, multmask2;
340 
341  __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
342 
343  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
344  multmask = 0x00FF;
345  multmask <<= (ashift * 2);
346  multmask2 = 0x00FF00FF00FF00FFULL;
347 
348  while (height--) {
349  /* *INDENT-OFF* */
350  DUFFS_LOOP4({
351  Uint32 alpha = *srcp & amask;
352  if (alpha == 0) {
353  /* do nothing */
354  } else if (alpha == amask) {
355  *dstp = *srcp;
356  } else {
357  src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
358  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
359 
360  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
361  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
362 
363  mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
364  mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
365  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
366  mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
367  mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
368  mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
369 
370  /* blend */
371  src1 = _mm_mullo_pi16(src1, mm_alpha);
372  src1 = _mm_srli_pi16(src1, 8);
373  dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
374  dst1 = _mm_srli_pi16(dst1, 8);
375  dst1 = _mm_add_pi16(src1, dst1);
376  dst1 = _mm_packs_pu16(dst1, mm_zero);
377 
378  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
379  }
380  ++srcp;
381  ++dstp;
382  }, width);
383  /* *INDENT-ON* */
384  srcp += srcskip;
385  dstp += dstskip;
386  }
387  _mm_empty();
388 }
389 
390 #endif /* __MMX__ */
391 
392 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
393 static void
395 {
396  int width = info->dst_w;
397  int height = info->dst_h;
398  Uint32 *srcp = (Uint32 *) info->src;
399  int srcskip = info->src_skip >> 2;
400  Uint32 *dstp = (Uint32 *) info->dst;
401  int dstskip = info->dst_skip >> 2;
402 
403  while (height--) {
404  /* *INDENT-OFF* */
405  DUFFS_LOOP4({
406  Uint32 s = *srcp++;
407  Uint32 d = *dstp;
408  *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
409  + (s & d & 0x00010101)) | 0xff000000;
410  }, width);
411  /* *INDENT-ON* */
412  srcp += srcskip;
413  dstp += dstskip;
414  }
415 }
416 
417 /* fast RGB888->(A)RGB888 blending with surface alpha */
418 static void
420 {
421  unsigned alpha = info->a;
422  if (alpha == 128) {
424  } else {
425  int width = info->dst_w;
426  int height = info->dst_h;
427  Uint32 *srcp = (Uint32 *) info->src;
428  int srcskip = info->src_skip >> 2;
429  Uint32 *dstp = (Uint32 *) info->dst;
430  int dstskip = info->dst_skip >> 2;
431  Uint32 s;
432  Uint32 d;
433  Uint32 s1;
434  Uint32 d1;
435 
436  while (height--) {
437  /* *INDENT-OFF* */
438  DUFFS_LOOP4({
439  s = *srcp;
440  d = *dstp;
441  s1 = s & 0xff00ff;
442  d1 = d & 0xff00ff;
443  d1 = (d1 + ((s1 - d1) * alpha >> 8))
444  & 0xff00ff;
445  s &= 0xff00;
446  d &= 0xff00;
447  d = (d + ((s - d) * alpha >> 8)) & 0xff00;
448  *dstp = d1 | d | 0xff000000;
449  ++srcp;
450  ++dstp;
451  }, width);
452  /* *INDENT-ON* */
453  srcp += srcskip;
454  dstp += dstskip;
455  }
456  }
457 }
458 
459 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
460 static void
462 {
463  int width = info->dst_w;
464  int height = info->dst_h;
465  Uint32 *srcp = (Uint32 *) info->src;
466  int srcskip = info->src_skip >> 2;
467  Uint32 *dstp = (Uint32 *) info->dst;
468  int dstskip = info->dst_skip >> 2;
469 
470  while (height--) {
471  /* *INDENT-OFF* */
472  DUFFS_LOOP4({
473  Uint32 dalpha;
474  Uint32 d;
475  Uint32 s1;
476  Uint32 d1;
477  Uint32 s = *srcp;
478  Uint32 alpha = s >> 24;
479  /* FIXME: Here we special-case opaque alpha since the
480  compositioning used (>>8 instead of /255) doesn't handle
481  it correctly. Also special-case alpha=0 for speed?
482  Benchmark this! */
483  if (alpha) {
484  if (alpha == SDL_ALPHA_OPAQUE) {
485  *dstp = *srcp;
486  } else {
487  /*
488  * take out the middle component (green), and process
489  * the other two in parallel. One multiply less.
490  */
491  d = *dstp;
492  dalpha = d >> 24;
493  s1 = s & 0xff00ff;
494  d1 = d & 0xff00ff;
495  d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
496  s &= 0xff00;
497  d &= 0xff00;
498  d = (d + ((s - d) * alpha >> 8)) & 0xff00;
499  dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
500  *dstp = d1 | d | (dalpha << 24);
501  }
502  }
503  ++srcp;
504  ++dstp;
505  }, width);
506  /* *INDENT-ON* */
507  srcp += srcskip;
508  dstp += dstskip;
509  }
510 }
511 
512 #ifdef __3dNOW__
513 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
514 static void
515 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
516 {
517  int width = info->dst_w;
518  int height = info->dst_h;
519  Uint32 *srcp = (Uint32 *) info->src;
520  int srcskip = info->src_skip >> 2;
521  Uint32 *dstp = (Uint32 *) info->dst;
522  int dstskip = info->dst_skip >> 2;
523  SDL_PixelFormat *sf = info->src_fmt;
524  Uint32 amask = sf->Amask;
525  Uint32 ashift = sf->Ashift;
526  Uint64 multmask, multmask2;
527 
528  __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
529 
530  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
531  multmask = 0x00FF;
532  multmask <<= (ashift * 2);
533  multmask2 = 0x00FF00FF00FF00FFULL;
534 
535  while (height--) {
536  /* *INDENT-OFF* */
537  DUFFS_LOOP4({
538  Uint32 alpha;
539 
540  _m_prefetch(srcp + 16);
541  _m_prefetch(dstp + 16);
542 
543  alpha = *srcp & amask;
544  if (alpha == 0) {
545  /* do nothing */
546  } else if (alpha == amask) {
547  *dstp = *srcp;
548  } else {
549  src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
550  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
551 
552  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
553  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
554 
555  mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
556  mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
557  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
558  mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
559  mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
560  mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
561 
562 
563  /* blend */
564  src1 = _mm_mullo_pi16(src1, mm_alpha);
565  src1 = _mm_srli_pi16(src1, 8);
566  dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
567  dst1 = _mm_srli_pi16(dst1, 8);
568  dst1 = _mm_add_pi16(src1, dst1);
569  dst1 = _mm_packs_pu16(dst1, mm_zero);
570 
571  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
572  }
573  ++srcp;
574  ++dstp;
575  }, width);
576  /* *INDENT-ON* */
577  srcp += srcskip;
578  dstp += dstskip;
579  }
580  _mm_empty();
581 }
582 
583 #endif /* __3dNOW__ */
584 
585 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
586 
587 /* blend a single 16 bit pixel at 50% */
588 #define BLEND16_50(d, s, mask) \
589  ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
590 
591 /* blend two 16 bit pixels at 50% */
592 #define BLEND2x16_50(d, s, mask) \
593  (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
594  + (s & d & (~(mask | mask << 16))))
595 
596 static void
598 {
599  int width = info->dst_w;
600  int height = info->dst_h;
601  Uint16 *srcp = (Uint16 *) info->src;
602  int srcskip = info->src_skip >> 1;
603  Uint16 *dstp = (Uint16 *) info->dst;
604  int dstskip = info->dst_skip >> 1;
605 
606  while (height--) {
607  if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
608  /*
609  * Source and destination not aligned, pipeline it.
610  * This is mostly a win for big blits but no loss for
611  * small ones
612  */
613  Uint32 prev_sw;
614  int w = width;
615 
616  /* handle odd destination */
617  if ((uintptr_t) dstp & 2) {
618  Uint16 d = *dstp, s = *srcp;
619  *dstp = BLEND16_50(d, s, mask);
620  dstp++;
621  srcp++;
622  w--;
623  }
624  srcp++; /* srcp is now 32-bit aligned */
625 
626  /* bootstrap pipeline with first halfword */
627  prev_sw = ((Uint32 *) srcp)[-1];
628 
629  while (w > 1) {
630  Uint32 sw, dw, s;
631  sw = *(Uint32 *) srcp;
632  dw = *(Uint32 *) dstp;
633 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
634  s = (prev_sw << 16) + (sw >> 16);
635 #else
636  s = (prev_sw >> 16) + (sw << 16);
637 #endif
638  prev_sw = sw;
639  *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
640  dstp += 2;
641  srcp += 2;
642  w -= 2;
643  }
644 
645  /* final pixel if any */
646  if (w) {
647  Uint16 d = *dstp, s;
648 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
649  s = (Uint16) prev_sw;
650 #else
651  s = (Uint16) (prev_sw >> 16);
652 #endif
653  *dstp = BLEND16_50(d, s, mask);
654  srcp++;
655  dstp++;
656  }
657  srcp += srcskip - 1;
658  dstp += dstskip;
659  } else {
660  /* source and destination are aligned */
661  int w = width;
662 
663  /* first odd pixel? */
664  if ((uintptr_t) srcp & 2) {
665  Uint16 d = *dstp, s = *srcp;
666  *dstp = BLEND16_50(d, s, mask);
667  srcp++;
668  dstp++;
669  w--;
670  }
671  /* srcp and dstp are now 32-bit aligned */
672 
673  while (w > 1) {
674  Uint32 sw = *(Uint32 *) srcp;
675  Uint32 dw = *(Uint32 *) dstp;
676  *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
677  srcp += 2;
678  dstp += 2;
679  w -= 2;
680  }
681 
682  /* last odd pixel? */
683  if (w) {
684  Uint16 d = *dstp, s = *srcp;
685  *dstp = BLEND16_50(d, s, mask);
686  srcp++;
687  dstp++;
688  }
689  srcp += srcskip;
690  dstp += dstskip;
691  }
692  }
693 }
694 
695 #ifdef __MMX__
696 
697 /* fast RGB565->RGB565 blending with surface alpha */
698 static void
699 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
700 {
701  unsigned alpha = info->a;
702  if (alpha == 128) {
703  Blit16to16SurfaceAlpha128(info, 0xf7de);
704  } else {
705  int width = info->dst_w;
706  int height = info->dst_h;
707  Uint16 *srcp = (Uint16 *) info->src;
708  int srcskip = info->src_skip >> 1;
709  Uint16 *dstp = (Uint16 *) info->dst;
710  int dstskip = info->dst_skip >> 1;
711  Uint32 s, d;
712 
713  __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
714 
715  alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
716  mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
717  alpha >>= 3; /* downscale alpha to 5 bits */
718 
719  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
720  mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
721  /* position alpha to allow for mullo and mulhi on diff channels
722  to reduce the number of operations */
723  mm_alpha = _mm_slli_si64(mm_alpha, 3);
724 
725  /* Setup the 565 color channel masks */
726  gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
727  bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
728 
729  while (height--) {
730  /* *INDENT-OFF* */
732  {
733  s = *srcp++;
734  d = *dstp;
735  /*
736  * shift out the middle component (green) to
737  * the high 16 bits, and process all three RGB
738  * components at the same time.
739  */
740  s = (s | s << 16) & 0x07e0f81f;
741  d = (d | d << 16) & 0x07e0f81f;
742  d += (s - d) * alpha >> 5;
743  d &= 0x07e0f81f;
744  *dstp++ = (Uint16)(d | d >> 16);
745  },{
746  s = *srcp++;
747  d = *dstp;
748  /*
749  * shift out the middle component (green) to
750  * the high 16 bits, and process all three RGB
751  * components at the same time.
752  */
753  s = (s | s << 16) & 0x07e0f81f;
754  d = (d | d << 16) & 0x07e0f81f;
755  d += (s - d) * alpha >> 5;
756  d &= 0x07e0f81f;
757  *dstp++ = (Uint16)(d | d >> 16);
758  s = *srcp++;
759  d = *dstp;
760  /*
761  * shift out the middle component (green) to
762  * the high 16 bits, and process all three RGB
763  * components at the same time.
764  */
765  s = (s | s << 16) & 0x07e0f81f;
766  d = (d | d << 16) & 0x07e0f81f;
767  d += (s - d) * alpha >> 5;
768  d &= 0x07e0f81f;
769  *dstp++ = (Uint16)(d | d >> 16);
770  },{
771  src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
772  dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
773 
774  /* red */
775  src2 = src1;
776  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */
777 
778  dst2 = dst1;
779  dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */
780 
781  /* blend */
782  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
783  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
784  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
785  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
786  dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */
787 
788  mm_res = dst2; /* RED -> mm_res */
789 
790  /* green -- process the bits in place */
791  src2 = src1;
792  src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
793 
794  dst2 = dst1;
795  dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
796 
797  /* blend */
798  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
799  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
800  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
801  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
802 
803  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
804 
805  /* blue */
806  src2 = src1;
807  src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
808 
809  dst2 = dst1;
810  dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
811 
812  /* blend */
813  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
814  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
815  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
816  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
817  dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
818 
819  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
820 
821  *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
822 
823  srcp += 4;
824  dstp += 4;
825  }, width);
826  /* *INDENT-ON* */
827  srcp += srcskip;
828  dstp += dstskip;
829  }
830  _mm_empty();
831  }
832 }
833 
834 /* fast RGB555->RGB555 blending with surface alpha */
835 static void
836 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
837 {
838  unsigned alpha = info->a;
839  if (alpha == 128) {
840  Blit16to16SurfaceAlpha128(info, 0xfbde);
841  } else {
842  int width = info->dst_w;
843  int height = info->dst_h;
844  Uint16 *srcp = (Uint16 *) info->src;
845  int srcskip = info->src_skip >> 1;
846  Uint16 *dstp = (Uint16 *) info->dst;
847  int dstskip = info->dst_skip >> 1;
848  Uint32 s, d;
849 
850  __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
851 
852  alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
853  mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
854  alpha >>= 3; /* downscale alpha to 5 bits */
855 
856  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
857  mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
858  /* position alpha to allow for mullo and mulhi on diff channels
859  to reduce the number of operations */
860  mm_alpha = _mm_slli_si64(mm_alpha, 3);
861 
862  /* Setup the 555 color channel masks */
863  rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
864  gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
865  bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
866 
867  while (height--) {
868  /* *INDENT-OFF* */
870  {
871  s = *srcp++;
872  d = *dstp;
873  /*
874  * shift out the middle component (green) to
875  * the high 16 bits, and process all three RGB
876  * components at the same time.
877  */
878  s = (s | s << 16) & 0x03e07c1f;
879  d = (d | d << 16) & 0x03e07c1f;
880  d += (s - d) * alpha >> 5;
881  d &= 0x03e07c1f;
882  *dstp++ = (Uint16)(d | d >> 16);
883  },{
884  s = *srcp++;
885  d = *dstp;
886  /*
887  * shift out the middle component (green) to
888  * the high 16 bits, and process all three RGB
889  * components at the same time.
890  */
891  s = (s | s << 16) & 0x03e07c1f;
892  d = (d | d << 16) & 0x03e07c1f;
893  d += (s - d) * alpha >> 5;
894  d &= 0x03e07c1f;
895  *dstp++ = (Uint16)(d | d >> 16);
896  s = *srcp++;
897  d = *dstp;
898  /*
899  * shift out the middle component (green) to
900  * the high 16 bits, and process all three RGB
901  * components at the same time.
902  */
903  s = (s | s << 16) & 0x03e07c1f;
904  d = (d | d << 16) & 0x03e07c1f;
905  d += (s - d) * alpha >> 5;
906  d &= 0x03e07c1f;
907  *dstp++ = (Uint16)(d | d >> 16);
908  },{
909  src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
910  dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
911 
912  /* red -- process the bits in place */
913  src2 = src1;
914  src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */
915 
916  dst2 = dst1;
917  dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */
918 
919  /* blend */
920  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
921  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
922  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
923  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
924  dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */
925 
926  mm_res = dst2; /* RED -> mm_res */
927 
928  /* green -- process the bits in place */
929  src2 = src1;
930  src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
931 
932  dst2 = dst1;
933  dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
934 
935  /* blend */
936  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
937  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
938  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
939  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
940 
941  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
942 
943  /* blue */
944  src2 = src1; /* src -> src2 */
945  src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
946 
947  dst2 = dst1; /* dst -> dst2 */
948  dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
949 
950  /* blend */
951  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
952  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
953  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
954  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
955  dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
956 
957  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
958 
959  *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
960 
961  srcp += 4;
962  dstp += 4;
963  }, width);
964  /* *INDENT-ON* */
965  srcp += srcskip;
966  dstp += dstskip;
967  }
968  _mm_empty();
969  }
970 }
971 
972 #endif /* __MMX__ */
973 
974 /* fast RGB565->RGB565 blending with surface alpha */
975 static void
977 {
978  unsigned alpha = info->a;
979  if (alpha == 128) {
980  Blit16to16SurfaceAlpha128(info, 0xf7de);
981  } else {
982  int width = info->dst_w;
983  int height = info->dst_h;
984  Uint16 *srcp = (Uint16 *) info->src;
985  int srcskip = info->src_skip >> 1;
986  Uint16 *dstp = (Uint16 *) info->dst;
987  int dstskip = info->dst_skip >> 1;
988  alpha >>= 3; /* downscale alpha to 5 bits */
989 
990  while (height--) {
991  /* *INDENT-OFF* */
992  DUFFS_LOOP4({
993  Uint32 s = *srcp++;
994  Uint32 d = *dstp;
995  /*
996  * shift out the middle component (green) to
997  * the high 16 bits, and process all three RGB
998  * components at the same time.
999  */
1000  s = (s | s << 16) & 0x07e0f81f;
1001  d = (d | d << 16) & 0x07e0f81f;
1002  d += (s - d) * alpha >> 5;
1003  d &= 0x07e0f81f;
1004  *dstp++ = (Uint16)(d | d >> 16);
1005  }, width);
1006  /* *INDENT-ON* */
1007  srcp += srcskip;
1008  dstp += dstskip;
1009  }
1010  }
1011 }
1012 
1013 /* fast RGB555->RGB555 blending with surface alpha */
1014 static void
1016 {
1017  unsigned alpha = info->a; /* downscale alpha to 5 bits */
1018  if (alpha == 128) {
1019  Blit16to16SurfaceAlpha128(info, 0xfbde);
1020  } else {
1021  int width = info->dst_w;
1022  int height = info->dst_h;
1023  Uint16 *srcp = (Uint16 *) info->src;
1024  int srcskip = info->src_skip >> 1;
1025  Uint16 *dstp = (Uint16 *) info->dst;
1026  int dstskip = info->dst_skip >> 1;
1027  alpha >>= 3; /* downscale alpha to 5 bits */
1028 
1029  while (height--) {
1030  /* *INDENT-OFF* */
1031  DUFFS_LOOP4({
1032  Uint32 s = *srcp++;
1033  Uint32 d = *dstp;
1034  /*
1035  * shift out the middle component (green) to
1036  * the high 16 bits, and process all three RGB
1037  * components at the same time.
1038  */
1039  s = (s | s << 16) & 0x03e07c1f;
1040  d = (d | d << 16) & 0x03e07c1f;
1041  d += (s - d) * alpha >> 5;
1042  d &= 0x03e07c1f;
1043  *dstp++ = (Uint16)(d | d >> 16);
1044  }, width);
1045  /* *INDENT-ON* */
1046  srcp += srcskip;
1047  dstp += dstskip;
1048  }
1049  }
1050 }
1051 
1052 /* fast ARGB8888->RGB565 blending with pixel alpha */
1053 static void
1055 {
1056  int width = info->dst_w;
1057  int height = info->dst_h;
1058  Uint32 *srcp = (Uint32 *) info->src;
1059  int srcskip = info->src_skip >> 2;
1060  Uint16 *dstp = (Uint16 *) info->dst;
1061  int dstskip = info->dst_skip >> 1;
1062 
1063  while (height--) {
1064  /* *INDENT-OFF* */
1065  DUFFS_LOOP4({
1066  Uint32 s = *srcp;
1067  unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
1068  /* FIXME: Here we special-case opaque alpha since the
1069  compositioning used (>>8 instead of /255) doesn't handle
1070  it correctly. Also special-case alpha=0 for speed?
1071  Benchmark this! */
1072  if(alpha) {
1073  if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1074  *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1075  } else {
1076  Uint32 d = *dstp;
1077  /*
1078  * convert source and destination to G0RAB65565
1079  * and blend all components at the same time
1080  */
1081  s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1082  + (s >> 3 & 0x1f);
1083  d = (d | d << 16) & 0x07e0f81f;
1084  d += (s - d) * alpha >> 5;
1085  d &= 0x07e0f81f;
1086  *dstp = (Uint16)(d | d >> 16);
1087  }
1088  }
1089  srcp++;
1090  dstp++;
1091  }, width);
1092  /* *INDENT-ON* */
1093  srcp += srcskip;
1094  dstp += dstskip;
1095  }
1096 }
1097 
1098 /* fast ARGB8888->RGB555 blending with pixel alpha */
1099 static void
1101 {
1102  int width = info->dst_w;
1103  int height = info->dst_h;
1104  Uint32 *srcp = (Uint32 *) info->src;
1105  int srcskip = info->src_skip >> 2;
1106  Uint16 *dstp = (Uint16 *) info->dst;
1107  int dstskip = info->dst_skip >> 1;
1108 
1109  while (height--) {
1110  /* *INDENT-OFF* */
1111  DUFFS_LOOP4({
1112  unsigned alpha;
1113  Uint32 s = *srcp;
1114  alpha = s >> 27; /* downscale alpha to 5 bits */
1115  /* FIXME: Here we special-case opaque alpha since the
1116  compositioning used (>>8 instead of /255) doesn't handle
1117  it correctly. Also special-case alpha=0 for speed?
1118  Benchmark this! */
1119  if(alpha) {
1120  if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1121  *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1122  } else {
1123  Uint32 d = *dstp;
1124  /*
1125  * convert source and destination to G0RAB65565
1126  * and blend all components at the same time
1127  */
1128  s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1129  + (s >> 3 & 0x1f);
1130  d = (d | d << 16) & 0x03e07c1f;
1131  d += (s - d) * alpha >> 5;
1132  d &= 0x03e07c1f;
1133  *dstp = (Uint16)(d | d >> 16);
1134  }
1135  }
1136  srcp++;
1137  dstp++;
1138  }, width);
1139  /* *INDENT-ON* */
1140  srcp += srcskip;
1141  dstp += dstskip;
1142  }
1143 }
1144 
1145 /* General (slow) N->N blending with per-surface alpha */
1146 static void
1148 {
1149  int width = info->dst_w;
1150  int height = info->dst_h;
1151  Uint8 *src = info->src;
1152  int srcskip = info->src_skip;
1153  Uint8 *dst = info->dst;
1154  int dstskip = info->dst_skip;
1155  SDL_PixelFormat *srcfmt = info->src_fmt;
1156  SDL_PixelFormat *dstfmt = info->dst_fmt;
1157  int srcbpp = srcfmt->BytesPerPixel;
1158  int dstbpp = dstfmt->BytesPerPixel;
1159  Uint32 Pixel;
1160  unsigned sR, sG, sB;
1161  unsigned dR, dG, dB, dA;
1162  const unsigned sA = info->a;
1163 
1164  if (sA) {
1165  while (height--) {
1166  /* *INDENT-OFF* */
1167  DUFFS_LOOP4(
1168  {
1169  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
1170  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1171  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1172  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1173  src += srcbpp;
1174  dst += dstbpp;
1175  },
1176  width);
1177  /* *INDENT-ON* */
1178  src += srcskip;
1179  dst += dstskip;
1180  }
1181  }
1182 }
1183 
1184 /* General (slow) colorkeyed N->N blending with per-surface alpha */
1185 static void
1187 {
1188  int width = info->dst_w;
1189  int height = info->dst_h;
1190  Uint8 *src = info->src;
1191  int srcskip = info->src_skip;
1192  Uint8 *dst = info->dst;
1193  int dstskip = info->dst_skip;
1194  SDL_PixelFormat *srcfmt = info->src_fmt;
1195  SDL_PixelFormat *dstfmt = info->dst_fmt;
1196  Uint32 ckey = info->colorkey;
1197  int srcbpp = srcfmt->BytesPerPixel;
1198  int dstbpp = dstfmt->BytesPerPixel;
1199  Uint32 Pixel;
1200  unsigned sR, sG, sB;
1201  unsigned dR, dG, dB, dA;
1202  const unsigned sA = info->a;
1203 
1204  while (height--) {
1205  /* *INDENT-OFF* */
1206  DUFFS_LOOP4(
1207  {
1208  RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
1209  if(sA && Pixel != ckey) {
1210  RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
1211  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1212  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1213  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1214  }
1215  src += srcbpp;
1216  dst += dstbpp;
1217  },
1218  width);
1219  /* *INDENT-ON* */
1220  src += srcskip;
1221  dst += dstskip;
1222  }
1223 }
1224 
1225 /* General (slow) N->N blending with pixel alpha */
1226 static void
1228 {
1229  int width = info->dst_w;
1230  int height = info->dst_h;
1231  Uint8 *src = info->src;
1232  int srcskip = info->src_skip;
1233  Uint8 *dst = info->dst;
1234  int dstskip = info->dst_skip;
1235  SDL_PixelFormat *srcfmt = info->src_fmt;
1236  SDL_PixelFormat *dstfmt = info->dst_fmt;
1237  int srcbpp;
1238  int dstbpp;
1239  Uint32 Pixel;
1240  unsigned sR, sG, sB, sA;
1241  unsigned dR, dG, dB, dA;
1242 
1243  /* Set up some basic variables */
1244  srcbpp = srcfmt->BytesPerPixel;
1245  dstbpp = dstfmt->BytesPerPixel;
1246 
1247  while (height--) {
1248  /* *INDENT-OFF* */
1249  DUFFS_LOOP4(
1250  {
1251  DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
1252  if(sA) {
1253  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1254  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1255  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1256  }
1257  src += srcbpp;
1258  dst += dstbpp;
1259  },
1260  width);
1261  /* *INDENT-ON* */
1262  src += srcskip;
1263  dst += dstskip;
1264  }
1265 }
1266 
1267 
1270 {
1271  SDL_PixelFormat *sf = surface->format;
1272  SDL_PixelFormat *df = surface->map->dst->format;
1273 
1274  switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
1275  case SDL_COPY_BLEND:
1276  /* Per-pixel alpha blits */
1277  switch (df->BytesPerPixel) {
1278  case 1:
1279  return BlitNto1PixelAlpha;
1280 
1281  case 2:
1282  if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
1283  && sf->Gmask == 0xff00
1284  && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
1285  || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
1286  if (df->Gmask == 0x7e0)
1287  return BlitARGBto565PixelAlpha;
1288  else if (df->Gmask == 0x3e0)
1289  return BlitARGBto555PixelAlpha;
1290  }
1291  return BlitNtoNPixelAlpha;
1292 
1293  case 4:
1294  if (sf->Rmask == df->Rmask
1295  && sf->Gmask == df->Gmask
1296  && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1297 #if defined(__MMX__) || defined(__3dNOW__)
1298  if (sf->Rshift % 8 == 0
1299  && sf->Gshift % 8 == 0
1300  && sf->Bshift % 8 == 0
1301  && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
1302 #ifdef __3dNOW__
1303  if (SDL_Has3DNow())
1304  return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1305 #endif
1306 #ifdef __MMX__
1307  if (SDL_HasMMX())
1308  return BlitRGBtoRGBPixelAlphaMMX;
1309 #endif
1310  }
1311 #endif /* __MMX__ || __3dNOW__ */
1312  if (sf->Amask == 0xff000000) {
1313  return BlitRGBtoRGBPixelAlpha;
1314  }
1315  }
1316  return BlitNtoNPixelAlpha;
1317 
1318  case 3:
1319  default:
1320  break;
1321  }
1322  return BlitNtoNPixelAlpha;
1323 
1325  if (sf->Amask == 0) {
1326  /* Per-surface alpha blits */
1327  switch (df->BytesPerPixel) {
1328  case 1:
1329  return BlitNto1SurfaceAlpha;
1330 
1331  case 2:
1332  if (surface->map->identity) {
1333  if (df->Gmask == 0x7e0) {
1334 #ifdef __MMX__
1335  if (SDL_HasMMX())
1336  return Blit565to565SurfaceAlphaMMX;
1337  else
1338 #endif
1339  return Blit565to565SurfaceAlpha;
1340  } else if (df->Gmask == 0x3e0) {
1341 #ifdef __MMX__
1342  if (SDL_HasMMX())
1343  return Blit555to555SurfaceAlphaMMX;
1344  else
1345 #endif
1346  return Blit555to555SurfaceAlpha;
1347  }
1348  }
1349  return BlitNtoNSurfaceAlpha;
1350 
1351  case 4:
1352  if (sf->Rmask == df->Rmask
1353  && sf->Gmask == df->Gmask
1354  && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1355 #ifdef __MMX__
1356  if (sf->Rshift % 8 == 0
1357  && sf->Gshift % 8 == 0
1358  && sf->Bshift % 8 == 0 && SDL_HasMMX())
1359  return BlitRGBtoRGBSurfaceAlphaMMX;
1360 #endif
1361  if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
1362  return BlitRGBtoRGBSurfaceAlpha;
1363  }
1364  }
1365  return BlitNtoNSurfaceAlpha;
1366 
1367  case 3:
1368  default:
1369  return BlitNtoNSurfaceAlpha;
1370  }
1371  }
1372  break;
1373 
1375  if (sf->Amask == 0) {
1376  if (df->BytesPerPixel == 1) {
1377  return BlitNto1SurfaceAlphaKey;
1378  } else {
1379  return BlitNtoNSurfaceAlphaKey;
1380  }
1381  }
1382  break;
1383  }
1384 
1385  return NULL;
1386 }
1387 
1388 /* vi: set ts=4 sw=4 expandtab: */
Uint8 * table
Definition: SDL_blit.h:67
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
Definition: SDL_blit_A.c:1269
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1054
#define BLEND16_50(d, s, mask)
Definition: SDL_blit_A.c:588
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:30
#define SDL_Has3DNow
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1186
GLdouble s
Definition: SDL_opengl.h:2063
#define SDL_COPY_COLORKEY
Definition: SDL_blit.h:39
int src_skip
Definition: SDL_blit.h:60
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
Definition: SDL_blit.h:146
Uint8 g
Definition: SDL_pixels.h:298
GLenum GLenum dst
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
Definition: SDL_blit.h:454
Uint8 BytesPerPixel
Definition: SDL_pixels.h:320
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
Definition: SDL_blit.h:445
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:123
SDL_PixelFormat * src_fmt
Definition: SDL_blit.h:65
EGLSurface surface
Definition: eglext.h:248
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
Definition: SDL_blit.h:402
A collection of pixels used in software blitting.
Definition: SDL_surface.h:69
#define SDL_COPY_RLE_MASK
Definition: SDL_blit.h:44
Uint8 b
Definition: SDL_pixels.h:299
int dst_skip
Definition: SDL_blit.h:64
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1100
uint32_t Uint32
Definition: SDL_stdinc.h:181
GLenum src
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1227
uint64_t Uint64
Definition: SDL_stdinc.h:194
GLfloat GLfloat GLfloat alpha
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
Uint32 colorkey
Definition: SDL_blit.h:69
Uint8 * dst
Definition: SDL_blit.h:61
struct SDL_BlitMap * map
Definition: SDL_surface.h:88
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1015
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
Definition: SDL_blit.h:353
Uint8 r
Definition: SDL_pixels.h:297
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:394
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
uint8_t Uint8
Definition: SDL_stdinc.h:157
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
Definition: SDL_blit.h:122
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:976
#define DUFFS_LOOP4(pixel_copy_increment, width)
Definition: SDL_blit.h:488
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:461
GLenum GLint GLuint mask
#define DUFFS_LOOP(pixel_copy_increment, width)
Definition: SDL_blit.h:500
GLubyte GLubyte GLubyte GLubyte w
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:419
unsigned int uintptr_t
#define SDL_HasMMX
Uint8 * src
Definition: SDL_blit.h:57
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
Definition: SDL_blit_A.c:597
SDL_PixelFormat * dst_fmt
Definition: SDL_blit.h:66
SDL_Surface * dst
Definition: SDL_blit.h:88
#define NULL
Definition: begin_code.h:164
SDL_Color * colors
Definition: SDL_pixels.h:307
SDL_PixelFormat * format
Definition: SDL_surface.h:72
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
#define SDL_COPY_MODULATE_ALPHA
Definition: SDL_blit.h:35
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:77
GLdouble n
void(* SDL_BlitFunc)(SDL_BlitInfo *info)
Definition: SDL_blit.h:73
uint16_t Uint16
Definition: SDL_stdinc.h:169
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1147
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
Definition: SDL_blit.h:177
#define DUFFS_LOOP_124(pixel_copy_increment1, pixel_copy_increment2, pixel_copy_increment4, width)
Definition: SDL_blit.h:504
SDL_Palette * palette
Definition: SDL_pixels.h:318
#define SDL_ALPHA_OPAQUE
Definition: SDL_pixels.h:46
int identity
Definition: SDL_blit.h:89
#define BLEND2x16_50(d, s, mask)
Definition: SDL_blit_A.c:592
#define SDL_COPY_BLEND
Definition: SDL_blit.h:36
SDL_BlitInfo info
Definition: SDL_blit.h:92
Uint8 a
Definition: SDL_blit.h:70