22 #include "../SDL_internal.h" 29 #define HAVE_NEON_INTRINSICS 0 32 #define HAVE_SSE2_INTRINSICS 1 35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS 36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS 38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS 40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS 42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS 47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1 63 #define DIVBY128 0.0078125f 64 #define DIVBY32768 0.000030517578125f 65 #define DIVBY2147483648 0.00000000046566128730773926 68 #if NEED_SCALAR_CONVERTER_FALLBACKS 78 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
97 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
98 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
149 float *
dst = (
float *) cvt->
buf;
166 const float *
src = (
const float *) cvt->
buf;
173 const float sample = *
src;
176 }
else if (sample < -1.0
f) {
192 const float *
src = (
const float *) cvt->
buf;
199 const float sample = *
src;
202 }
else if (sample < -1.0
f) {
205 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
218 const float *
src = (
const float *) cvt->
buf;
225 const float sample = *
src;
228 }
else if (sample < -1.0
f) {
244 const float *
src = (
const float *) cvt->
buf;
251 const float sample = *
src;
254 }
else if (sample < -1.0
f) {
270 const float *
src = (
const float *) cvt->
buf;
277 const float sample = *
src;
280 }
else if (sample < -1.0
f) {
283 *
dst = (
Sint32)((
double)sample * 2147483647.0);
294 #if HAVE_SSE2_INTRINSICS 305 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
309 src -= 15; dst -= 15;
310 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
313 if ((((
size_t) src) & 15) == 0) {
315 const __m128i *mmsrc = (
const __m128i *) src;
316 const __m128i
zero = _mm_setzero_si128();
317 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
319 const __m128i bytes = _mm_load_si128(mmsrc);
321 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
323 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
325 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
326 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
327 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
328 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
330 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
331 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
332 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
333 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
334 i -= 16; mmsrc--; dst -= 16;
337 src = (
const Sint8 *) mmsrc;
340 src += 15; dst += 15;
364 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
365 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
368 src -= 15; dst -= 15;
369 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
372 if ((((
size_t) src) & 15) == 0) {
374 const __m128i *mmsrc = (
const __m128i *) src;
375 const __m128i
zero = _mm_setzero_si128();
376 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
377 const __m128 minus1 = _mm_set1_ps(-1.0
f);
379 const __m128i bytes = _mm_load_si128(mmsrc);
381 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
383 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
386 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
387 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
388 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
389 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
391 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
392 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
393 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
394 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
395 i -= 16; mmsrc--; dst -= 16;
398 src = (
const Uint8 *) mmsrc;
401 src += 15; dst += 15;
405 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
430 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
433 if ((((
size_t) src) & 15) == 0) {
435 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
437 const __m128i ints = _mm_load_si128((__m128i
const *) src);
439 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
441 const __m128i
b = _mm_srai_epi32(ints, 16);
443 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
444 _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
445 i -= 8; src -= 8; dst -= 8;
478 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
481 if ((((
size_t) src) & 15) == 0) {
483 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
484 const __m128 minus1 = _mm_set1_ps(1.0
f);
486 const __m128i ints = _mm_load_si128((__m128i
const *) src);
488 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
490 const __m128i
b = _mm_srli_epi32(ints, 16);
492 _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
493 _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
494 i -= 8; src -= 8; dst -= 8;
512 #if defined(__GNUC__) && (__GNUC__ < 4) 514 static inline __m128 _mm_castsi128_ps(__m128i __A) {
517 static inline __m128i _mm_castps_si128(__m128 __A) {
518 return (__m128i) __A;
526 float *
dst = (
float *) cvt->
buf;
536 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
537 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
542 const __m128i *mmsrc = (
const __m128i *) src;
544 const __m128i ints = _mm_load_si128(mmsrc);
546 const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483648);
547 const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483648);
549 _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_slli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
550 i -= 4; mmsrc++; dst += 4;
552 src = (
const Sint32 *) mmsrc;
569 const float *
src = (
const float *) cvt->
buf;
577 *dst = (
Sint8) (*src * 127.0
f);
580 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
583 if ((((
size_t) src) & 15) == 0) {
585 const __m128 mulby127 = _mm_set1_ps(127.0
f);
586 __m128i *mmdst = (__m128i *) dst;
588 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby127));
589 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby127));
590 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+8), mulby127));
591 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+12), mulby127));
592 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
593 i -= 16; src += 16; mmdst++;
595 dst = (
Sint8 *) mmdst;
600 *dst = (
Sint8) (*src * 127.0
f);
613 const float *
src = (
const float *) cvt->
buf;
621 *dst = (
Uint8) ((*src + 1.0
f) * 127.0f);
624 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
627 if ((((
size_t) src) & 15) == 0) {
629 const __m128 add1 = _mm_set1_ps(1.0
f);
630 const __m128 mulby127 = _mm_set1_ps(127.0
f);
631 __m128i *mmdst = (__m128i *) dst;
633 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src), add1), mulby127));
634 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+4), add1), mulby127));
635 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+8), add1), mulby127));
636 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+12), add1), mulby127));
637 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
638 i -= 16; src += 16; mmdst++;
640 dst = (
Uint8 *) mmdst;
645 *dst = (
Uint8) ((*src + 1.0
f) * 127.0f);
658 const float *
src = (
const float *) cvt->
buf;
666 *dst = (
Sint16) (*src * 32767.0
f);
669 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
672 if ((((
size_t) src) & 15) == 0) {
674 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
675 __m128i *mmdst = (__m128i *) dst;
677 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));
678 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));
679 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
680 i -= 8; src += 8; mmdst++;
687 *dst = (
Sint16) (*src * 32767.0
f);
700 const float *
src = (
const float *) cvt->
buf;
708 *dst = (
Uint16) ((*src + 1.0
f) * 32767.0f);
711 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
714 if ((((
size_t) src) & 15) == 0) {
723 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
724 const __m128i topbit = _mm_set1_epi16(-32768);
725 __m128i *mmdst = (__m128i *) dst;
727 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));
728 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));
729 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
730 i -= 8; src += 8; mmdst++;
737 *dst = (
Uint16) ((*src + 1.0
f) * 32767.0f);
750 const float *
src = (
const float *) cvt->
buf;
758 *dst = (
Sint32) (((
double) *
src) * 2147483647.0);
761 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
762 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
766 const __m128d mulby2147483647 = _mm_set1_pd(2147483647.0);
767 __m128i *mmdst = (__m128i *) dst;
769 const __m128 floats = _mm_load_ps(src);
771 const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
772 const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
773 _mm_store_si128(mmdst, _mm_or_si128(_mm_slli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
774 i -= 4; src += 4; mmdst++;
781 *dst = (
Sint32) (((
double) *
src) * 2147483647.0);
796 if (converters_chosen) {
800 #define SET_CONVERTER_FUNCS(fntype) \ 801 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ 802 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ 803 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \ 804 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \ 805 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \ 806 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \ 807 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ 808 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ 809 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \ 810 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ 811 converters_chosen = SDL_TRUE 813 #if HAVE_SSE2_INTRINSICS 820 #if NEED_SCALAR_CONVERTER_FALLBACKS 824 #undef SET_CONVERTER_FUNCS #define LOG_DEBUG_CONVERT(from, to)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
Uint16 SDL_AudioFormat
Audio format flags.
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
#define SDL_assert(condition)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S8_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
SDL_AudioFilter SDL_Convert_S32_to_F32
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)