SDL  2.0
SDL_audiotypecvt.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #include "../SDL_internal.h"
23 #include "SDL_audio.h"
24 #include "SDL_audio_c.h"
25 #include "SDL_cpuinfo.h"
26 #include "SDL_assert.h"
27 
28 /* !!! FIXME: write NEON code. */
29 #define HAVE_NEON_INTRINSICS 0
30 
31 #ifdef __SSE2__
32 #define HAVE_SSE2_INTRINSICS 1
33 #endif
34 
35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* Mac OS X/Intel guarantees SSE2. */
39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* ARMv8+ promise NEON. */
41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* All Apple ARMv7 chips promise NEON support. */
43 #endif
44 
45 /* Set to zero if platform is guaranteed to use a SIMD codepath here. */
46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
48 #endif
49 
50 /* Function pointers set to a CPU-specific implementation. */
61 
62 
63 #define DIVBY128 0.0078125f
64 #define DIVBY32768 0.000030517578125f
65 #define DIVBY2147483648 0.00000000046566128730773926
66 
67 
68 #if NEED_SCALAR_CONVERTER_FALLBACKS
69 static void SDLCALL
71 {
72  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
73  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
74  int i;
75 
76  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
77 
78  for (i = cvt->len_cvt; i; --i, --src, --dst) {
79  *dst = ((float) *src) * DIVBY128;
80  }
81 
82  cvt->len_cvt *= 4;
83  if (cvt->filters[++cvt->filter_index]) {
84  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
85  }
86 }
87 
88 static void SDLCALL
90 {
91  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
92  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
93  int i;
94 
95  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
96 
97  for (i = cvt->len_cvt; i; --i, --src, --dst) {
98  *dst = (((float) *src) * DIVBY128) - 1.0f;
99  }
100 
101  cvt->len_cvt *= 4;
102  if (cvt->filters[++cvt->filter_index]) {
103  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
104  }
105 }
106 
107 static void SDLCALL
109 {
110  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
111  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
112  int i;
113 
114  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
115 
116  for (i = cvt->len_cvt / sizeof (Sint16); i; --i, --src, --dst) {
117  *dst = ((float) *src) * DIVBY32768;
118  }
119 
120  cvt->len_cvt *= 2;
121  if (cvt->filters[++cvt->filter_index]) {
122  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
123  }
124 }
125 
126 static void SDLCALL
128 {
129  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
130  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
131  int i;
132 
133  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
134 
135  for (i = cvt->len_cvt / sizeof (Uint16); i; --i, --src, --dst) {
136  *dst = (((float) *src) * DIVBY32768) - 1.0f;
137  }
138 
139  cvt->len_cvt *= 2;
140  if (cvt->filters[++cvt->filter_index]) {
141  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
142  }
143 }
144 
145 static void SDLCALL
147 {
148  const Sint32 *src = (const Sint32 *) cvt->buf;
149  float *dst = (float *) cvt->buf;
150  int i;
151 
152  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
153 
154  for (i = cvt->len_cvt / sizeof (Sint32); i; --i, ++src, ++dst) {
155  *dst = (float) (((double) *src) * DIVBY2147483648);
156  }
157 
158  if (cvt->filters[++cvt->filter_index]) {
159  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
160  }
161 }
162 
163 static void SDLCALL
165 {
166  const float *src = (const float *) cvt->buf;
167  Sint8 *dst = (Sint8 *) cvt->buf;
168  int i;
169 
170  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
171 
172  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
173  const float sample = *src;
174  if (sample > 1.0f) {
175  *dst = 127;
176  } else if (sample < -1.0f) {
177  *dst = -127;
178  } else {
179  *dst = (Sint8)(sample * 127.0f);
180  }
181  }
182 
183  cvt->len_cvt /= 4;
184  if (cvt->filters[++cvt->filter_index]) {
185  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
186  }
187 }
188 
189 static void SDLCALL
191 {
192  const float *src = (const float *) cvt->buf;
193  Uint8 *dst = (Uint8 *) cvt->buf;
194  int i;
195 
196  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
197 
198  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
199  const float sample = *src;
200  if (sample > 1.0f) {
201  *dst = 255;
202  } else if (sample < -1.0f) {
203  *dst = 0;
204  } else {
205  *dst = (Uint8)((sample + 1.0f) * 127.0f);
206  }
207  }
208 
209  cvt->len_cvt /= 4;
210  if (cvt->filters[++cvt->filter_index]) {
211  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
212  }
213 }
214 
215 static void SDLCALL
217 {
218  const float *src = (const float *) cvt->buf;
219  Sint16 *dst = (Sint16 *) cvt->buf;
220  int i;
221 
222  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
223 
224  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
225  const float sample = *src;
226  if (sample > 1.0f) {
227  *dst = 32767;
228  } else if (sample < -1.0f) {
229  *dst = -32767;
230  } else {
231  *dst = (Sint16)(sample * 32767.0f);
232  }
233  }
234 
235  cvt->len_cvt /= 2;
236  if (cvt->filters[++cvt->filter_index]) {
237  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
238  }
239 }
240 
241 static void SDLCALL
243 {
244  const float *src = (const float *) cvt->buf;
245  Uint16 *dst = (Uint16 *) cvt->buf;
246  int i;
247 
248  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
249 
250  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
251  const float sample = *src;
252  if (sample > 1.0f) {
253  *dst = 65534;
254  } else if (sample < -1.0f) {
255  *dst = 0;
256  } else {
257  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
258  }
259  }
260 
261  cvt->len_cvt /= 2;
262  if (cvt->filters[++cvt->filter_index]) {
263  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
264  }
265 }
266 
267 static void SDLCALL
269 {
270  const float *src = (const float *) cvt->buf;
271  Sint32 *dst = (Sint32 *) cvt->buf;
272  int i;
273 
274  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
275 
276  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
277  const float sample = *src;
278  if (sample > 1.0f) {
279  *dst = 2147483647;
280  } else if (sample < -1.0f) {
281  *dst = -2147483647;
282  } else {
283  *dst = (Sint32)((double)sample * 2147483647.0);
284  }
285  }
286 
287  if (cvt->filters[++cvt->filter_index]) {
288  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
289  }
290 }
291 #endif
292 
293 
294 #if HAVE_SSE2_INTRINSICS
295 static void SDLCALL
296 SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
297 {
298  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
299  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
300  int i;
301 
302  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
303 
304  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
305  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
306  *dst = ((float) *src) * DIVBY128;
307  }
308 
309  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
310  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
311 
312  /* Make sure src is aligned too. */
313  if ((((size_t) src) & 15) == 0) {
314  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
315  const __m128i *mmsrc = (const __m128i *) src;
316  const __m128i zero = _mm_setzero_si128();
317  const __m128 divby128 = _mm_set1_ps(DIVBY128);
318  while (i >= 16) { /* 16 * 8-bit */
319  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 sint8 into an XMM register. */
320  /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
321  const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
322  /* right-shift-sign-extend gets us sint16 with the other set of values. */
323  const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
324  /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
325  const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
326  const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
327  const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
328  const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
329  /* Interleave back into correct order, store. */
330  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
331  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
332  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
333  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
334  i -= 16; mmsrc--; dst -= 16;
335  }
336 
337  src = (const Sint8 *) mmsrc;
338  }
339 
340  src += 15; dst += 15; /* adjust for any scalar finishing. */
341 
342  /* Finish off any leftovers with scalar operations. */
343  while (i) {
344  *dst = ((float) *src) * DIVBY128;
345  i--; src--; dst--;
346  }
347 
348  cvt->len_cvt *= 4;
349  if (cvt->filters[++cvt->filter_index]) {
350  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
351  }
352 }
353 
354 static void SDLCALL
355 SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
356 {
357  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
358  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
359  int i;
360 
361  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
362 
363  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
364  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
365  *dst = (((float) *src) * DIVBY128) - 1.0f;
366  }
367 
368  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
369  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
370 
371  /* Make sure src is aligned too. */
372  if ((((size_t) src) & 15) == 0) {
373  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
374  const __m128i *mmsrc = (const __m128i *) src;
375  const __m128i zero = _mm_setzero_si128();
376  const __m128 divby128 = _mm_set1_ps(DIVBY128);
377  const __m128 minus1 = _mm_set1_ps(-1.0f);
378  while (i >= 16) { /* 16 * 8-bit */
379  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 uint8 into an XMM register. */
380  /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
381  const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
382  /* right-shift-zero-extend gets us uint16 with the other set of values. */
383  const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
384  /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
385  /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
386  const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
387  const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
388  const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
389  const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
390  /* Interleave back into correct order, store. */
391  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
392  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
393  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
394  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
395  i -= 16; mmsrc--; dst -= 16;
396  }
397 
398  src = (const Uint8 *) mmsrc;
399  }
400 
401  src += 15; dst += 15; /* adjust for any scalar finishing. */
402 
403  /* Finish off any leftovers with scalar operations. */
404  while (i) {
405  *dst = (((float) *src) * DIVBY128) - 1.0f;
406  i--; src--; dst--;
407  }
408 
409  cvt->len_cvt *= 4;
410  if (cvt->filters[++cvt->filter_index]) {
411  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
412  }
413 }
414 
415 static void SDLCALL
416 SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
417 {
418  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
419  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
420  int i;
421 
422  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
423 
424  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
425  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
426  *dst = ((float) *src) * DIVBY32768;
427  }
428 
429  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
430  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
431 
432  /* Make sure src is aligned too. */
433  if ((((size_t) src) & 15) == 0) {
434  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
435  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
436  while (i >= 8) { /* 8 * 16-bit */
437  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
438  /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
439  const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
440  /* right-shift-sign-extend gets us sint32 with the other set of values. */
441  const __m128i b = _mm_srai_epi32(ints, 16);
442  /* Interleave these back into the right order, convert to float, multiply, store. */
443  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
444  _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
445  i -= 8; src -= 8; dst -= 8;
446  }
447  }
448 
449  src += 7; dst += 7; /* adjust for any scalar finishing. */
450 
451  /* Finish off any leftovers with scalar operations. */
452  while (i) {
453  *dst = ((float) *src) * DIVBY32768;
454  i--; src--; dst--;
455  }
456 
457  cvt->len_cvt *= 2;
458  if (cvt->filters[++cvt->filter_index]) {
459  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
460  }
461 }
462 
463 static void SDLCALL
464 SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
465 {
466  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
467  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
468  int i;
469 
470  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
471 
472  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
473  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
474  *dst = (((float) *src) * DIVBY32768) - 1.0f;
475  }
476 
477  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
478  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
479 
480  /* Make sure src is aligned too. */
481  if ((((size_t) src) & 15) == 0) {
482  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
483  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
484  const __m128 minus1 = _mm_set1_ps(1.0f);
485  while (i >= 8) { /* 8 * 16-bit */
486  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
487  /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
488  const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
489  /* right-shift-sign-extend gets us sint32 with the other set of values. */
490  const __m128i b = _mm_srli_epi32(ints, 16);
491  /* Interleave these back into the right order, convert to float, multiply, store. */
492  _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
493  _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
494  i -= 8; src -= 8; dst -= 8;
495  }
496  }
497 
498  src += 7; dst += 7; /* adjust for any scalar finishing. */
499 
500  /* Finish off any leftovers with scalar operations. */
501  while (i) {
502  *dst = (((float) *src) * DIVBY32768) - 1.0f;
503  i--; src--; dst--;
504  }
505 
506  cvt->len_cvt *= 2;
507  if (cvt->filters[++cvt->filter_index]) {
508  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
509  }
510 }
511 
512 #if defined(__GNUC__) && (__GNUC__ < 4)
513 /* these were added as of gcc-4.0: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19418 */
514 static inline __m128 _mm_castsi128_ps(__m128i __A) {
515  return (__m128) __A;
516 }
517 static inline __m128i _mm_castps_si128(__m128 __A) {
518  return (__m128i) __A;
519 }
520 #endif
521 
522 static void SDLCALL
523 SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
524 {
525  const Sint32 *src = (const Sint32 *) cvt->buf;
526  float *dst = (float *) cvt->buf;
527  int i;
528 
529  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
530 
531  /* Get dst aligned to 16 bytes */
532  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
533  *dst = (float) (((double) *src) * DIVBY2147483648);
534  }
535 
536  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
537  SDL_assert(!i || ((((size_t) src) & 15) == 0));
538 
539  {
540  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
541  const __m128d divby2147483648 = _mm_set1_pd(DIVBY2147483648);
542  const __m128i *mmsrc = (const __m128i *) src;
543  while (i >= 4) { /* 4 * sint32 */
544  const __m128i ints = _mm_load_si128(mmsrc);
545  /* bitshift the whole register over, so _mm_cvtepi32_pd can read the top ints in the bottom of the vector. */
546  const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483648);
547  const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483648);
548  /* convert to float32, bitshift/or to get these into a vector to store. */
549  _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_slli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
550  i -= 4; mmsrc++; dst += 4;
551  }
552  src = (const Sint32 *) mmsrc;
553  }
554 
555  /* Finish off any leftovers with scalar operations. */
556  while (i) {
557  *dst = (float) (((double) *src) * DIVBY2147483648);
558  i--; src++; dst++;
559  }
560 
561  if (cvt->filters[++cvt->filter_index]) {
562  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
563  }
564 }
565 
566 static void SDLCALL
567 SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
568 {
569  const float *src = (const float *) cvt->buf;
570  Sint8 *dst = (Sint8 *) cvt->buf;
571  int i;
572 
573  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
574 
575  /* Get dst aligned to 16 bytes */
576  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
577  *dst = (Sint8) (*src * 127.0f);
578  }
579 
580  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
581 
582  /* Make sure src is aligned too. */
583  if ((((size_t) src) & 15) == 0) {
584  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
585  const __m128 mulby127 = _mm_set1_ps(127.0f);
586  __m128i *mmdst = (__m128i *) dst;
587  while (i >= 16) { /* 16 * float32 */
588  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby127)); /* load 4 floats, convert to sint32 */
589  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby127)); /* load 4 floats, convert to sint32 */
590  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+8), mulby127)); /* load 4 floats, convert to sint32 */
591  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+12), mulby127)); /* load 4 floats, convert to sint32 */
592  _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
593  i -= 16; src += 16; mmdst++;
594  }
595  dst = (Sint8 *) mmdst;
596  }
597 
598  /* Finish off any leftovers with scalar operations. */
599  while (i) {
600  *dst = (Sint8) (*src * 127.0f);
601  i--; src++; dst++;
602  }
603 
604  cvt->len_cvt /= 4;
605  if (cvt->filters[++cvt->filter_index]) {
606  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
607  }
608 }
609 
610 static void SDLCALL
611 SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
612 {
613  const float *src = (const float *) cvt->buf;
614  Uint8 *dst = (Uint8 *) cvt->buf;
615  int i;
616 
617  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
618 
619  /* Get dst aligned to 16 bytes */
620  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
621  *dst = (Uint8) ((*src + 1.0f) * 127.0f);
622  }
623 
624  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
625 
626  /* Make sure src is aligned too. */
627  if ((((size_t) src) & 15) == 0) {
628  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
629  const __m128 add1 = _mm_set1_ps(1.0f);
630  const __m128 mulby127 = _mm_set1_ps(127.0f);
631  __m128i *mmdst = (__m128i *) dst;
632  while (i >= 16) { /* 16 * float32 */
633  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src), add1), mulby127)); /* load 4 floats, convert to sint32 */
634  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+4), add1), mulby127)); /* load 4 floats, convert to sint32 */
635  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+8), add1), mulby127)); /* load 4 floats, convert to sint32 */
636  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+12), add1), mulby127)); /* load 4 floats, convert to sint32 */
637  _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
638  i -= 16; src += 16; mmdst++;
639  }
640  dst = (Uint8 *) mmdst;
641  }
642 
643  /* Finish off any leftovers with scalar operations. */
644  while (i) {
645  *dst = (Uint8) ((*src + 1.0f) * 127.0f);
646  i--; src++; dst++;
647  }
648 
649  cvt->len_cvt /= 4;
650  if (cvt->filters[++cvt->filter_index]) {
651  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
652  }
653 }
654 
655 static void SDLCALL
656 SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
657 {
658  const float *src = (const float *) cvt->buf;
659  Sint16 *dst = (Sint16 *) cvt->buf;
660  int i;
661 
662  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
663 
664  /* Get dst aligned to 16 bytes */
665  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
666  *dst = (Sint16) (*src * 32767.0f);
667  }
668 
669  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
670 
671  /* Make sure src is aligned too. */
672  if ((((size_t) src) & 15) == 0) {
673  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
674  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
675  __m128i *mmdst = (__m128i *) dst;
676  while (i >= 8) { /* 8 * float32 */
677  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767)); /* load 4 floats, convert to sint32 */
678  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767)); /* load 4 floats, convert to sint32 */
679  _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2)); /* pack to sint16, store out. */
680  i -= 8; src += 8; mmdst++;
681  }
682  dst = (Sint16 *) mmdst;
683  }
684 
685  /* Finish off any leftovers with scalar operations. */
686  while (i) {
687  *dst = (Sint16) (*src * 32767.0f);
688  i--; src++; dst++;
689  }
690 
691  cvt->len_cvt /= 2;
692  if (cvt->filters[++cvt->filter_index]) {
693  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
694  }
695 }
696 
697 static void SDLCALL
698 SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
699 {
700  const float *src = (const float *) cvt->buf;
701  Uint16 *dst = (Uint16 *) cvt->buf;
702  int i;
703 
704  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
705 
706  /* Get dst aligned to 16 bytes */
707  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
708  *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
709  }
710 
711  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
712 
713  /* Make sure src is aligned too. */
714  if ((((size_t) src) & 15) == 0) {
715  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
716  /* This calculates differently than the scalar path because SSE2 can't
717  pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
718  saturation, so that would corrupt our data. _mm_packus_epi32 exists,
719  but not before SSE 4.1. So we convert from float to sint16, packing
720  that down with legit signed saturation, and then xor the top bit
721  against 1. This results in the correct unsigned 16-bit value, even
722  though it looks like dark magic. */
723  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
724  const __m128i topbit = _mm_set1_epi16(-32768);
725  __m128i *mmdst = (__m128i *) dst;
726  while (i >= 8) { /* 8 * float32 */
727  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767)); /* load 4 floats, convert to sint32 */
728  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767)); /* load 4 floats, convert to sint32 */
729  _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit)); /* pack to sint16, xor top bit, store out. */
730  i -= 8; src += 8; mmdst++;
731  }
732  dst = (Uint16 *) mmdst;
733  }
734 
735  /* Finish off any leftovers with scalar operations. */
736  while (i) {
737  *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
738  i--; src++; dst++;
739  }
740 
741  cvt->len_cvt /= 2;
742  if (cvt->filters[++cvt->filter_index]) {
743  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
744  }
745 }
746 
747 static void SDLCALL
748 SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
749 {
750  const float *src = (const float *) cvt->buf;
751  Sint32 *dst = (Sint32 *) cvt->buf;
752  int i;
753 
754  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
755 
756  /* Get dst aligned to 16 bytes */
757  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
758  *dst = (Sint32) (((double) *src) * 2147483647.0);
759  }
760 
761  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
762  SDL_assert(!i || ((((size_t) src) & 15) == 0));
763 
764  {
765  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
766  const __m128d mulby2147483647 = _mm_set1_pd(2147483647.0);
767  __m128i *mmdst = (__m128i *) dst;
768  while (i >= 4) { /* 4 * float32 */
769  const __m128 floats = _mm_load_ps(src);
770  /* bitshift the whole register over, so _mm_cvtps_pd can read the top floats in the bottom of the vector. */
771  const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
772  const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
773  _mm_store_si128(mmdst, _mm_or_si128(_mm_slli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
774  i -= 4; src += 4; mmdst++;
775  }
776  dst = (Sint32 *) mmdst;
777  }
778 
779  /* Finish off any leftovers with scalar operations. */
780  while (i) {
781  *dst = (Sint32) (((double) *src) * 2147483647.0);
782  i--; src++; dst++;
783  }
784 
785  if (cvt->filters[++cvt->filter_index]) {
786  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
787  }
788 }
789 #endif
790 
791 
793 {
794  static SDL_bool converters_chosen = SDL_FALSE;
795 
796  if (converters_chosen) {
797  return;
798  }
799 
800 #define SET_CONVERTER_FUNCS(fntype) \
801  SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
802  SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
803  SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
804  SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
805  SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
806  SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
807  SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
808  SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
809  SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
810  SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
811  converters_chosen = SDL_TRUE
812 
813 #if HAVE_SSE2_INTRINSICS
814  if (SDL_HasSSE2()) {
815  SET_CONVERTER_FUNCS(SSE2);
816  return;
817  }
818 #endif
819 
820 #if NEED_SCALAR_CONVERTER_FALLBACKS
821  SET_CONVERTER_FUNCS(Scalar);
822 #endif
823 
824 #undef SET_CONVERTER_FUNCS
825 
826  SDL_assert(converters_chosen == SDL_TRUE);
827 }
828 
829 /* vi: set ts=4 sw=4 expandtab: */
#define LOG_DEBUG_CONVERT(from, to)
Definition: SDL_audio_c.h:34
GLenum GLenum dst
Uint8 * buf
Definition: SDL_audio.h:231
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
int filter_index
Definition: SDL_audio.h:237
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
GLfloat f
Uint16 SDL_AudioFormat
Audio format flags.
Definition: SDL_audio.h:64
GLenum src
#define AUDIO_S16SYS
Definition: SDL_audio.h:123
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
Definition: SDL_audio.h:225
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: SDL_opengl.h:1572
unsigned int size_t
static const double zero
Definition: e_atan2.c:44
#define AUDIO_U8
Definition: SDL_audio.h:89
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
Definition: SDL_audio.h:236
#define AUDIO_F32SYS
Definition: SDL_audio.h:125
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int8_t Sint8
Definition: SDL_stdinc.h:151
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
uint8_t Uint8
Definition: SDL_stdinc.h:157
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int32_t Sint32
Definition: SDL_stdinc.h:175
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Definition: SDL_audio.h:192
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define SDL_assert(condition)
Definition: SDL_assert.h:169
#define NULL
Definition: begin_code.h:164
SDL_bool
Definition: SDL_stdinc.h:139
#define DIVBY2147483648
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define SDL_HasSSE2
SDL_AudioFilter SDL_Convert_S8_to_F32
#define AUDIO_S32SYS
Definition: SDL_audio.h:124
uint16_t Uint16
Definition: SDL_stdinc.h:169
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define DIVBY32768
#define DIVBY128
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
#define AUDIO_S8
Definition: SDL_audio.h:90
SDL_AudioFilter SDL_Convert_S32_to_F32
#define SDLCALL
Definition: SDL_internal.h:45
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)
#define AUDIO_U16SYS
Definition: SDL_audio.h:122
int16_t Sint16
Definition: SDL_stdinc.h:163