22 #include "../SDL_internal.h" 29 #define HAVE_NEON_INTRINSICS 0 32 #define HAVE_SSE2_INTRINSICS 1 35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS 36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS 38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS 40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS 42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS 47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1 63 #define DIVBY128 0.0078125f 64 #define DIVBY32768 0.000030517578125f 65 #define DIVBY2147483648 0.00000000046566128730773926 68 #if NEED_SCALAR_CONVERTER_FALLBACKS 78 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
97 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
98 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
149 float *
dst = (
float *) cvt->
buf;
166 const float *
src = (
const float *) cvt->
buf;
185 const float *
src = (
const float *) cvt->
buf;
204 const float *
src = (
const float *) cvt->
buf;
223 const float *
src = (
const float *) cvt->
buf;
242 const float *
src = (
const float *) cvt->
buf;
249 *
dst = (
Sint32) (((
double) *src) * 2147483647.0);
259 #if HAVE_SSE2_INTRINSICS 270 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
274 src -= 15; dst -= 15;
275 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
278 if ((((
size_t) src) & 15) == 0) {
280 const __m128i *mmsrc = (
const __m128i *) src;
281 const __m128i
zero = _mm_setzero_si128();
282 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
284 const __m128i bytes = _mm_load_si128(mmsrc);
286 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
288 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
290 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
291 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
292 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
293 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
295 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
296 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
297 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
298 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
299 i -= 16; mmsrc--; dst -= 16;
302 src = (
const Sint8 *) mmsrc;
305 src += 15; dst += 15;
329 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
330 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
333 src -= 15; dst -= 15;
334 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
337 if ((((
size_t) src) & 15) == 0) {
339 const __m128i *mmsrc = (
const __m128i *) src;
340 const __m128i
zero = _mm_setzero_si128();
341 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
342 const __m128 minus1 = _mm_set1_ps(-1.0
f);
344 const __m128i bytes = _mm_load_si128(mmsrc);
346 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
348 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
351 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
352 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
353 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
354 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
356 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
357 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
358 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
359 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
360 i -= 16; mmsrc--; dst -= 16;
363 src = (
const Uint8 *) mmsrc;
366 src += 15; dst += 15;
370 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
395 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
398 if ((((
size_t) src) & 15) == 0) {
400 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
402 const __m128i ints = _mm_load_si128((__m128i
const *) src);
404 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
406 const __m128i
b = _mm_srai_epi32(ints, 16);
408 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
409 _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
410 i -= 8; src -= 8; dst -= 8;
443 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
446 if ((((
size_t) src) & 15) == 0) {
448 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
449 const __m128 minus1 = _mm_set1_ps(1.0
f);
451 const __m128i ints = _mm_load_si128((__m128i
const *) src);
453 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
455 const __m128i
b = _mm_srli_epi32(ints, 16);
457 _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
458 _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
459 i -= 8; src -= 8; dst -= 8;
481 float *
dst = (
float *) cvt->
buf;
491 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
492 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
497 const __m128i *mmsrc = (
const __m128i *) src;
499 const __m128i ints = _mm_load_si128(mmsrc);
501 const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483648);
502 const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483648);
504 _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_slli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
505 i -= 4; mmsrc++; dst += 4;
507 src = (
const Sint32 *) mmsrc;
524 const float *
src = (
const float *) cvt->
buf;
532 *dst = (
Sint8) (*src * 127.0
f);
535 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
538 if ((((
size_t) src) & 15) == 0) {
540 const __m128 mulby127 = _mm_set1_ps(127.0
f);
541 __m128i *mmdst = (__m128i *) dst;
543 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby127));
544 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby127));
545 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+8), mulby127));
546 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+12), mulby127));
547 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
548 i -= 16; src += 16; mmdst++;
550 dst = (
Sint8 *) mmdst;
555 *dst = (
Sint8) (*src * 127.0
f);
568 const float *
src = (
const float *) cvt->
buf;
576 *dst = (
Uint8) ((*src + 1.0
f) * 127.0f);
579 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
582 if ((((
size_t) src) & 15) == 0) {
584 const __m128 add1 = _mm_set1_ps(1.0
f);
585 const __m128 mulby127 = _mm_set1_ps(127.0
f);
586 __m128i *mmdst = (__m128i *) dst;
588 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src), add1), mulby127));
589 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+4), add1), mulby127));
590 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+8), add1), mulby127));
591 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_load_ps(src+12), add1), mulby127));
592 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
593 i -= 16; src += 16; mmdst++;
595 dst = (
Uint8 *) mmdst;
600 *dst = (
Uint8) ((*src + 1.0
f) * 127.0f);
613 const float *
src = (
const float *) cvt->
buf;
621 *dst = (
Sint16) (*src * 32767.0
f);
624 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
627 if ((((
size_t) src) & 15) == 0) {
629 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
630 __m128i *mmdst = (__m128i *) dst;
632 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));
633 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));
634 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
635 i -= 8; src += 8; mmdst++;
642 *dst = (
Sint16) (*src * 32767.0
f);
655 const float *
src = (
const float *) cvt->
buf;
663 *dst = (
Uint16) ((*src + 1.0
f) * 32767.0f);
666 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
669 if ((((
size_t) src) & 15) == 0) {
678 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
679 const __m128i topbit = _mm_set1_epi16(-32768);
680 __m128i *mmdst = (__m128i *) dst;
682 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src), mulby32767));
683 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(src+4), mulby32767));
684 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
685 i -= 8; src += 8; mmdst++;
692 *dst = (
Uint16) ((*src + 1.0
f) * 32767.0f);
705 const float *
src = (
const float *) cvt->
buf;
713 *dst = (
Sint32) (((
double) *
src) * 2147483647.0);
716 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
717 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
721 const __m128d mulby2147483647 = _mm_set1_pd(2147483647.0);
722 __m128i *mmdst = (__m128i *) dst;
724 const __m128 floats = _mm_load_ps(src);
726 const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
727 const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
728 _mm_store_si128(mmdst, _mm_or_si128(_mm_slli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
729 i -= 4; src += 4; mmdst++;
736 *dst = (
Sint32) (((
double) *
src) * 2147483647.0);
751 if (converters_chosen) {
755 #define SET_CONVERTER_FUNCS(fntype) \ 756 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ 757 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ 758 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \ 759 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \ 760 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \ 761 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \ 762 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ 763 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ 764 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \ 765 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ 766 converters_chosen = SDL_TRUE 768 #if HAVE_SSE2_INTRINSICS 775 #if NEED_SCALAR_CONVERTER_FALLBACKS 779 #undef SET_CONVERTER_FUNCS #define LOG_DEBUG_CONVERT(from, to)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
Uint16 SDL_AudioFormat
Audio format flags.
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int8_t Sint8
A signed 8-bit integer type.
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
uint8_t Uint8
An unsigned 8-bit integer type.
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int32_t Sint32
A signed 32-bit integer type.
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
#define SDL_assert(condition)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S8_to_F32
uint16_t Uint16
An unsigned 16-bit integer type.
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
SDL_AudioFilter SDL_Convert_S32_to_F32
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)
int16_t Sint16
A signed 16-bit integer type.