SDL  2.0
SDL_audiotypecvt.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #include "../SDL_internal.h"
23 #include "SDL_audio.h"
24 #include "SDL_audio_c.h"
25 #include "SDL_cpuinfo.h"
26 #include "SDL_assert.h"
27 
28 /* !!! FIXME: write NEON code. */
29 #define HAVE_NEON_INTRINSICS 0
30 
31 #ifdef __SSE2__
32 #define HAVE_SSE2_INTRINSICS 1
33 #endif
34 
35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* Mac OS X/Intel guarantees SSE2. */
39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* ARMv8+ promise NEON. */
41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* All Apple ARMv7 chips promise NEON support. */
43 #endif
44 
45 /* Set to zero if platform is guaranteed to use a SIMD codepath here. */
46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
48 #endif
49 
50 /* Function pointers set to a CPU-specific implementation. */
61 
62 
63 #define DIVBY128 0.0078125f
64 #define DIVBY32768 0.000030517578125f
65 #define DIVBY2147483648 0.00000000046566128730773926
66 
67 
68 #if NEED_SCALAR_CONVERTER_FALLBACKS
69 static void SDLCALL
71 {
72  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
73  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
74  int i;
75 
76  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
77 
78  for (i = cvt->len_cvt; i; --i, --src, --dst) {
79  *dst = ((float) *src) * DIVBY128;
80  }
81 
82  cvt->len_cvt *= 4;
83  if (cvt->filters[++cvt->filter_index]) {
84  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
85  }
86 }
87 
88 static void SDLCALL
90 {
91  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
92  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
93  int i;
94 
95  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
96 
97  for (i = cvt->len_cvt; i; --i, --src, --dst) {
98  *dst = (((float) *src) * DIVBY128) - 1.0f;
99  }
100 
101  cvt->len_cvt *= 4;
102  if (cvt->filters[++cvt->filter_index]) {
103  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
104  }
105 }
106 
107 static void SDLCALL
109 {
110  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
111  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
112  int i;
113 
114  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
115 
116  for (i = cvt->len_cvt / sizeof (Sint16); i; --i, --src, --dst) {
117  *dst = ((float) *src) * DIVBY32768;
118  }
119 
120  cvt->len_cvt *= 2;
121  if (cvt->filters[++cvt->filter_index]) {
122  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
123  }
124 }
125 
126 static void SDLCALL
128 {
129  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
130  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
131  int i;
132 
133  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
134 
135  for (i = cvt->len_cvt / sizeof (Uint16); i; --i, --src, --dst) {
136  *dst = (((float) *src) * DIVBY32768) - 1.0f;
137  }
138 
139  cvt->len_cvt *= 2;
140  if (cvt->filters[++cvt->filter_index]) {
141  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
142  }
143 }
144 
145 static void SDLCALL
147 {
148  const Sint32 *src = (const Sint32 *) cvt->buf;
149  float *dst = (float *) cvt->buf;
150  int i;
151 
152  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
153 
154  for (i = cvt->len_cvt / sizeof (Sint32); i; --i, ++src, ++dst) {
155  *dst = (float) (((double) *src) * DIVBY2147483648);
156  }
157 
158  if (cvt->filters[++cvt->filter_index]) {
159  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
160  }
161 }
162 
163 static void SDLCALL
165 {
166  const float *src = (const float *) cvt->buf;
167  Sint8 *dst = (Sint8 *) cvt->buf;
168  int i;
169 
170  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
171 
172  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
173  const float sample = *src;
174  if (sample > 1.0f) {
175  *dst = 127;
176  } else if (sample < -1.0f) {
177  *dst = -127;
178  } else {
179  *dst = (Sint8)(sample * 127.0f);
180  }
181  }
182 
183  cvt->len_cvt /= 4;
184  if (cvt->filters[++cvt->filter_index]) {
185  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
186  }
187 }
188 
189 static void SDLCALL
191 {
192  const float *src = (const float *) cvt->buf;
193  Uint8 *dst = (Uint8 *) cvt->buf;
194  int i;
195 
196  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
197 
198  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
199  const float sample = *src;
200  if (sample > 1.0f) {
201  *dst = 255;
202  } else if (sample < -1.0f) {
203  *dst = 0;
204  } else {
205  *dst = (Uint8)((sample + 1.0f) * 127.0f);
206  }
207  }
208 
209  cvt->len_cvt /= 4;
210  if (cvt->filters[++cvt->filter_index]) {
211  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
212  }
213 }
214 
215 static void SDLCALL
217 {
218  const float *src = (const float *) cvt->buf;
219  Sint16 *dst = (Sint16 *) cvt->buf;
220  int i;
221 
222  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
223 
224  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
225  const float sample = *src;
226  if (sample > 1.0f) {
227  *dst = 32767;
228  } else if (sample < -1.0f) {
229  *dst = -32767;
230  } else {
231  *dst = (Sint16)(sample * 32767.0f);
232  }
233  }
234 
235  cvt->len_cvt /= 2;
236  if (cvt->filters[++cvt->filter_index]) {
237  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
238  }
239 }
240 
241 static void SDLCALL
243 {
244  const float *src = (const float *) cvt->buf;
245  Uint16 *dst = (Uint16 *) cvt->buf;
246  int i;
247 
248  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
249 
250  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
251  const float sample = *src;
252  if (sample > 1.0f) {
253  *dst = 65534;
254  } else if (sample < -1.0f) {
255  *dst = 0;
256  } else {
257  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
258  }
259  }
260 
261  cvt->len_cvt /= 2;
262  if (cvt->filters[++cvt->filter_index]) {
263  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
264  }
265 }
266 
267 static void SDLCALL
269 {
270  const float *src = (const float *) cvt->buf;
271  Sint32 *dst = (Sint32 *) cvt->buf;
272  int i;
273 
274  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
275 
276  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
277  const float sample = *src;
278  if (sample > 1.0f) {
279  *dst = 2147483647;
280  } else if (sample < -1.0f) {
281  *dst = -2147483647;
282  } else {
283  *dst = (Sint32)((double)sample * 2147483647.0);
284  }
285  }
286 
287  if (cvt->filters[++cvt->filter_index]) {
288  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
289  }
290 }
291 #endif
292 
293 
294 #if HAVE_SSE2_INTRINSICS
295 static void SDLCALL
296 SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
297 {
298  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
299  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
300  int i;
301 
302  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
303 
304  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
305  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
306  *dst = ((float) *src) * DIVBY128;
307  }
308 
309  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
310  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
311 
312  /* Make sure src is aligned too. */
313  if ((((size_t) src) & 15) == 0) {
314  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
315  const __m128i *mmsrc = (const __m128i *) src;
316  const __m128i zero = _mm_setzero_si128();
317  const __m128 divby128 = _mm_set1_ps(DIVBY128);
318  while (i >= 16) { /* 16 * 8-bit */
319  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 sint8 into an XMM register. */
320  /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
321  const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
322  /* right-shift-sign-extend gets us sint16 with the other set of values. */
323  const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
324  /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
325  const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
326  const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
327  const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
328  const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
329  /* Interleave back into correct order, store. */
330  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
331  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
332  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
333  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
334  i -= 16; mmsrc--; dst -= 16;
335  }
336 
337  src = (const Sint8 *) mmsrc;
338  }
339 
340  src += 15; dst += 15; /* adjust for any scalar finishing. */
341 
342  /* Finish off any leftovers with scalar operations. */
343  while (i) {
344  *dst = ((float) *src) * DIVBY128;
345  i--; src--; dst--;
346  }
347 
348  cvt->len_cvt *= 4;
349  if (cvt->filters[++cvt->filter_index]) {
350  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
351  }
352 }
353 
354 static void SDLCALL
355 SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
356 {
357  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
358  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
359  int i;
360 
361  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
362 
363  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
364  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
365  *dst = (((float) *src) * DIVBY128) - 1.0f;
366  }
367 
368  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
369  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
370 
371  /* Make sure src is aligned too. */
372  if ((((size_t) src) & 15) == 0) {
373  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
374  const __m128i *mmsrc = (const __m128i *) src;
375  const __m128i zero = _mm_setzero_si128();
376  const __m128 divby128 = _mm_set1_ps(DIVBY128);
377  const __m128 minus1 = _mm_set1_ps(-1.0f);
378  while (i >= 16) { /* 16 * 8-bit */
379  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 uint8 into an XMM register. */
380  /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
381  const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
382  /* right-shift-zero-extend gets us uint16 with the other set of values. */
383  const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
384  /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
385  /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
386  const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
387  const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
388  const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
389  const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
390  /* Interleave back into correct order, store. */
391  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
392  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
393  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
394  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
395  i -= 16; mmsrc--; dst -= 16;
396  }
397 
398  src = (const Uint8 *) mmsrc;
399  }
400 
401  src += 15; dst += 15; /* adjust for any scalar finishing. */
402 
403  /* Finish off any leftovers with scalar operations. */
404  while (i) {
405  *dst = (((float) *src) * DIVBY128) - 1.0f;
406  i--; src--; dst--;
407  }
408 
409  cvt->len_cvt *= 4;
410  if (cvt->filters[++cvt->filter_index]) {
411  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
412  }
413 }
414 
415 static void SDLCALL
416 SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
417 {
418  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
419  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
420  int i;
421 
422  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
423 
424  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
425  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
426  *dst = ((float) *src) * DIVBY32768;
427  }
428 
429  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
430  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
431 
432  /* Make sure src is aligned too. */
433  if ((((size_t) src) & 15) == 0) {
434  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
435  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
436  while (i >= 8) { /* 8 * 16-bit */
437  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
438  /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
439  const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
440  /* right-shift-sign-extend gets us sint32 with the other set of values. */
441  const __m128i b = _mm_srai_epi32(ints, 16);
442  /* Interleave these back into the right order, convert to float, multiply, store. */
443  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
444  _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
445  i -= 8; src -= 8; dst -= 8;
446  }
447  }
448 
449  src += 7; dst += 7; /* adjust for any scalar finishing. */
450 
451  /* Finish off any leftovers with scalar operations. */
452  while (i) {
453  *dst = ((float) *src) * DIVBY32768;
454  i--; src--; dst--;
455  }
456 
457  cvt->len_cvt *= 2;
458  if (cvt->filters[++cvt->filter_index]) {
459  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
460  }
461 }
462 
463 static void SDLCALL
464 SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
465 {
466  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
467  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
468  int i;
469 
470  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
471 
472  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
473  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
474  *dst = (((float) *src) * DIVBY32768) - 1.0f;
475  }
476 
477  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
478  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
479 
480  /* Make sure src is aligned too. */
481  if ((((size_t) src) & 15) == 0) {
482  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
483  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
484  const __m128 minus1 = _mm_set1_ps(1.0f);
485  while (i >= 8) { /* 8 * 16-bit */
486  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
487  /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
488  const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
489  /* right-shift-sign-extend gets us sint32 with the other set of values. */
490  const __m128i b = _mm_srli_epi32(ints, 16);
491  /* Interleave these back into the right order, convert to float, multiply, store. */
492  _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
493  _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
494  i -= 8; src -= 8; dst -= 8;
495  }
496  }
497 
498  src += 7; dst += 7; /* adjust for any scalar finishing. */
499 
500  /* Finish off any leftovers with scalar operations. */
501  while (i) {
502  *dst = (((float) *src) * DIVBY32768) - 1.0f;
503  i--; src--; dst--;
504  }
505 
506  cvt->len_cvt *= 2;
507  if (cvt->filters[++cvt->filter_index]) {
508  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
509  }
510 }
511 
512 static void SDLCALL
513 SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
514 {
515  const Sint32 *src = (const Sint32 *) cvt->buf;
516  float *dst = (float *) cvt->buf;
517  int i;
518 
519  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
520 
521  /* Get dst aligned to 16 bytes */
522  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
523  *dst = (float) (((double) *src) * DIVBY2147483648);
524  }
525 
526  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
527  SDL_assert(!i || ((((size_t) src) & 15) == 0));
528 
529  {
530  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
531  const __m128d divby2147483648 = _mm_set1_pd(DIVBY2147483648);
532  const __m128i *mmsrc = (const __m128i *) src;
533  while (i >= 4) { /* 4 * sint32 */
534  const __m128i ints = _mm_load_si128(mmsrc);
535  /* bitshift the whole register over, so _mm_cvtepi32_pd can read the top ints in the bottom of the vector. */
536  const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483648);
537  const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483648);
538  /* convert to float32, bitshift/or to get these into a vector to store. */
539  _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_slli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
540  i -= 4; mmsrc++; dst += 4;
541  }
542  src = (const Sint32 *) mmsrc;
543  }
544 
545  /* Finish off any leftovers with scalar operations. */
546  while (i) {
547  *dst = (float) (((double) *src) * DIVBY2147483648);
548  i--; src++; dst++;
549  }
550 
551  if (cvt->filters[++cvt->filter_index]) {
552  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
553  }
554 }
555 
556 static void SDLCALL
557 SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
558 {
559  const float *src = (const float *) cvt->buf;
560  Sint8 *dst = (Sint8 *) cvt->buf;
561  int i;
562 
563  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
564 
565  /* Get dst aligned to 16 bytes */
566  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
567  *dst = (Sint8) (*src * 127.0f);
568  }
569 
570  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
571 
572  /* Make sure src is aligned too. */
573  if ((((size_t) src) & 15) == 0) {
574  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
575  const __m128 mulby127 = _mm_set1_ps(127.0f);
576  __m128i *mmdst = (__m128i *) dst;
577  while (i >= 16) { /* 16 * float32 */
578  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby127)); /* load 4 floats, convert to sint32 */
579  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby127)); /* load 4 floats, convert to sint32 */
580  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+8), mulby127)); /* load 4 floats, convert to sint32 */
581  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+12), mulby127)); /* load 4 floats, convert to sint32 */
582  _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
583  i -= 16; src += 16; mmdst++;
584  }
585  dst = (Sint8 *) mmdst;
586  }
587 
588  /* Finish off any leftovers with scalar operations. */
589  while (i) {
590  *dst = (Sint8) (*src * 127.0f);
591  i--; src++; dst++;
592  }
593 
594  cvt->len_cvt /= 4;
595  if (cvt->filters[++cvt->filter_index]) {
596  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
597  }
598 }
599 
600 static void SDLCALL
601 SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
602 {
603  const float *src = (const float *) cvt->buf;
604  Uint8 *dst = (Uint8 *) cvt->buf;
605  int i;
606 
607  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
608 
609  /* Get dst aligned to 16 bytes */
610  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
611  *dst = (Uint8) ((*src + 1.0f) * 127.0f);
612  }
613 
614  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
615 
616  /* Make sure src is aligned too. */
617  if ((((size_t) src) & 15) == 0) {
618  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
619  const __m128 add1 = _mm_set1_ps(1.0f);
620  const __m128 mulby127 = _mm_set1_ps(127.0f);
621  __m128i *mmdst = (__m128i *) dst;
622  while (i >= 16) { /* 16 * float32 */
623  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src), add1), mulby127)); /* load 4 floats, convert to sint32 */
624  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+4), add1), mulby127)); /* load 4 floats, convert to sint32 */
625  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+8), add1), mulby127)); /* load 4 floats, convert to sint32 */
626  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+12), add1), mulby127)); /* load 4 floats, convert to sint32 */
627  _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
628  i -= 16; src += 16; mmdst++;
629  }
630  dst = (Uint8 *) mmdst;
631  }
632 
633  /* Finish off any leftovers with scalar operations. */
634  while (i) {
635  *dst = (Uint8) ((*src + 1.0f) * 127.0f);
636  i--; src++; dst++;
637  }
638 
639  cvt->len_cvt /= 4;
640  if (cvt->filters[++cvt->filter_index]) {
641  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
642  }
643 }
644 
645 static void SDLCALL
646 SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
647 {
648  const float *src = (const float *) cvt->buf;
649  Sint16 *dst = (Sint16 *) cvt->buf;
650  int i;
651 
652  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
653 
654  /* Get dst aligned to 16 bytes */
655  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
656  *dst = (Sint16) (*src * 32767.0f);
657  }
658 
659  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
660 
661  /* Make sure src is aligned too. */
662  if ((((size_t) src) & 15) == 0) {
663  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
664  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
665  __m128i *mmdst = (__m128i *) dst;
666  while (i >= 8) { /* 8 * float32 */
667  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767)); /* load 4 floats, convert to sint32 */
668  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767)); /* load 4 floats, convert to sint32 */
669  _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2)); /* pack to sint16, store out. */
670  i -= 8; src += 8; mmdst++;
671  }
672  dst = (Sint16 *) mmdst;
673  }
674 
675  /* Finish off any leftovers with scalar operations. */
676  while (i) {
677  *dst = (Sint16) (*src * 32767.0f);
678  i--; src++; dst++;
679  }
680 
681  cvt->len_cvt /= 2;
682  if (cvt->filters[++cvt->filter_index]) {
683  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
684  }
685 }
686 
687 static void SDLCALL
688 SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
689 {
690  const float *src = (const float *) cvt->buf;
691  Uint16 *dst = (Uint16 *) cvt->buf;
692  int i;
693 
694  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
695 
696  /* Get dst aligned to 16 bytes */
697  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
698  *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
699  }
700 
701  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
702 
703  /* Make sure src is aligned too. */
704  if ((((size_t) src) & 15) == 0) {
705  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
706  /* This calculates differently than the scalar path because SSE2 can't
707  pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
708  saturation, so that would corrupt our data. _mm_packus_epi32 exists,
709  but not before SSE 4.1. So we convert from float to sint16, packing
710  that down with legit signed saturation, and then xor the top bit
711  against 1. This results in the correct unsigned 16-bit value, even
712  though it looks like dark magic. */
713  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
714  const __m128i topbit = _mm_set1_epi16(-32768);
715  __m128i *mmdst = (__m128i *) dst;
716  while (i >= 8) { /* 8 * float32 */
717  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767)); /* load 4 floats, convert to sint32 */
718  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767)); /* load 4 floats, convert to sint32 */
719  _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit)); /* pack to sint16, xor top bit, store out. */
720  i -= 8; src += 8; mmdst++;
721  }
722  dst = (Uint16 *) mmdst;
723  }
724 
725  /* Finish off any leftovers with scalar operations. */
726  while (i) {
727  *dst = (Uint16) ((*src + 1.0f) * 32767.0f);
728  i--; src++; dst++;
729  }
730 
731  cvt->len_cvt /= 2;
732  if (cvt->filters[++cvt->filter_index]) {
733  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
734  }
735 }
736 
737 static void SDLCALL
738 SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
739 {
740  const float *src = (const float *) cvt->buf;
741  Sint32 *dst = (Sint32 *) cvt->buf;
742  int i;
743 
744  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
745 
746  /* Get dst aligned to 16 bytes */
747  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
748  *dst = (Sint32) (((double) *src) * 2147483647.0);
749  }
750 
751  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
752  SDL_assert(!i || ((((size_t) src) & 15) == 0));
753 
754  {
755  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
756  const __m128d mulby2147483647 = _mm_set1_pd(2147483647.0);
757  __m128i *mmdst = (__m128i *) dst;
758  while (i >= 4) { /* 4 * float32 */
759  const __m128 floats = _mm_load_ps(src);
760  /* bitshift the whole register over, so _mm_cvtps_pd can read the top floats in the bottom of the vector. */
761  const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
762  const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
763  _mm_store_si128(mmdst, _mm_or_si128(_mm_slli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
764  i -= 4; src += 4; mmdst++;
765  }
766  dst = (Sint32 *) mmdst;
767  }
768 
769  /* Finish off any leftovers with scalar operations. */
770  while (i) {
771  *dst = (Sint32) (((double) *src) * 2147483647.0);
772  i--; src++; dst++;
773  }
774 
775  if (cvt->filters[++cvt->filter_index]) {
776  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
777  }
778 }
779 #endif
780 
781 
783 {
784  static SDL_bool converters_chosen = SDL_FALSE;
785 
786  if (converters_chosen) {
787  return;
788  }
789 
790 #define SET_CONVERTER_FUNCS(fntype) \
791  SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
792  SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
793  SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
794  SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
795  SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
796  SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
797  SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
798  SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
799  SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
800  SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
801  converters_chosen = SDL_TRUE
802 
803 #if HAVE_SSE2_INTRINSICS
804  if (SDL_HasSSE2()) {
805  SET_CONVERTER_FUNCS(SSE2);
806  return;
807  }
808 #endif
809 
810 #if NEED_SCALAR_CONVERTER_FALLBACKS
811  SET_CONVERTER_FUNCS(Scalar);
812 #endif
813 
814 #undef SET_CONVERTER_FUNCS
815 
816  SDL_assert(converters_chosen == SDL_TRUE);
817 }
818 
819 /* vi: set ts=4 sw=4 expandtab: */
#define LOG_DEBUG_CONVERT(from, to)
Definition: SDL_audio_c.h:34
GLenum GLenum dst
Uint8 * buf
Definition: SDL_audio.h:231
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
int filter_index
Definition: SDL_audio.h:237
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
GLfloat f
Uint16 SDL_AudioFormat
Audio format flags.
Definition: SDL_audio.h:64
GLenum src
#define AUDIO_S16SYS
Definition: SDL_audio.h:123
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
Definition: SDL_audio.h:225
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: SDL_opengl.h:1572
unsigned int size_t
#define AUDIO_U8
Definition: SDL_audio.h:89
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
Definition: SDL_audio.h:236
#define AUDIO_F32SYS
Definition: SDL_audio.h:125
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int8_t Sint8
Definition: SDL_stdinc.h:151
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
uint8_t Uint8
Definition: SDL_stdinc.h:157
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int32_t Sint32
Definition: SDL_stdinc.h:175
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Definition: SDL_audio.h:192
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define SDL_assert(condition)
Definition: SDL_assert.h:169
#define NULL
Definition: begin_code.h:164
SDL_bool
Definition: SDL_stdinc.h:139
#define DIVBY2147483648
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define SDL_HasSSE2
SDL_AudioFilter SDL_Convert_S8_to_F32
#define AUDIO_S32SYS
Definition: SDL_audio.h:124
uint16_t Uint16
Definition: SDL_stdinc.h:169
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define DIVBY32768
#define DIVBY128
SDL_AudioFilter SDL_Convert_U16_to_F32
zero
Definition: e_pow.c:78
GLboolean GLboolean GLboolean GLboolean a
#define AUDIO_S8
Definition: SDL_audio.h:90
SDL_AudioFilter SDL_Convert_S32_to_F32
#define SDLCALL
Definition: SDL_internal.h:45
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)
#define AUDIO_U16SYS
Definition: SDL_audio.h:122
int16_t Sint16
Definition: SDL_stdinc.h:163