71 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
72 #define INCLUDED_volk_32f_s32f_power_32f_a_H
79 #include <tmmintrin.h>
81 #ifdef LV_HAVE_LIB_SIMDMATH
85 static inline void volk_32f_s32f_power_32f_a_sse4_1(
float* cVector,
88 unsigned int num_points)
90 unsigned int number = 0;
92 float* cPtr = cVector;
93 const float* aPtr = aVector;
95 #ifdef LV_HAVE_LIB_SIMDMATH
96 const unsigned int quarterPoints = num_points / 4;
97 __m128 vPower = _mm_set_ps1(power);
98 __m128 zeroValue = _mm_setzero_ps();
100 __m128 negatedValues;
101 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
102 __m128 onesMask = _mm_set_ps1(1);
105 for (; number < quarterPoints; number++) {
107 aVal = _mm_load_ps(aPtr);
108 signMask = _mm_cmplt_ps(aVal, zeroValue);
109 negatedValues = _mm_sub_ps(zeroValue, aVal);
110 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
114 cVal = powf4(aVal, vPower);
116 cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
118 _mm_store_ps(cPtr, cVal);
124 number = quarterPoints * 4;
127 for (; number < num_points; number++) {
128 *cPtr++ = powf((*aPtr++), power);
136 #include <xmmintrin.h>
138 #ifdef LV_HAVE_LIB_SIMDMATH
139 #include <simdmath.h>
143 const float* aVector,
145 unsigned int num_points)
147 unsigned int number = 0;
149 float* cPtr = cVector;
150 const float* aPtr = aVector;
152 #ifdef LV_HAVE_LIB_SIMDMATH
153 const unsigned int quarterPoints = num_points / 4;
154 __m128 vPower = _mm_set_ps1(power);
155 __m128 zeroValue = _mm_setzero_ps();
157 __m128 negatedValues;
158 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
159 __m128 onesMask = _mm_set_ps1(1);
162 for (; number < quarterPoints; number++) {
164 aVal = _mm_load_ps(aPtr);
165 signMask = _mm_cmplt_ps(aVal, zeroValue);
166 negatedValues = _mm_sub_ps(zeroValue, aVal);
168 _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
172 cVal = powf4(aVal, vPower);
174 cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
175 _mm_and_ps(signMask, negativeOneToPower)),
178 _mm_store_ps(cPtr, cVal);
184 number = quarterPoints * 4;
187 for (; number < num_points; number++) {
188 *cPtr++ = powf((*aPtr++), power);
195 #ifdef LV_HAVE_GENERIC
198 const float* aVector,
200 unsigned int num_points)
202 float* cPtr = cVector;
203 const float* aPtr = aVector;
204 unsigned int number = 0;
206 for (number = 0; number < num_points; number++) {
207 *cPtr++ = powf((*aPtr++), power);