54 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
55 #define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
62 #include <immintrin.h>
64 static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
66 unsigned int num_points)
68 unsigned int number = 0;
69 const int16_t* complexVectorPtr = (int16_t*)complexVector;
70 int16_t* iBufferPtr = iBuffer;
72 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
104 __m256i iMoveMask2 = _mm256_set_epi8(13,
137 __m256i complexVal1, complexVal2, iOutputVal;
139 unsigned int sixteenthPoints = num_points / 16;
141 for (number = 0; number < sixteenthPoints; number++) {
142 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
143 complexVectorPtr += 16;
144 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
145 complexVectorPtr += 16;
147 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
148 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
150 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
151 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
153 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
158 number = sixteenthPoints * 16;
159 for (; number < num_points; number++) {
160 *iBufferPtr++ = *complexVectorPtr++;
167 #include <tmmintrin.h>
171 unsigned int num_points)
173 unsigned int number = 0;
174 const int16_t* complexVectorPtr = (int16_t*)complexVector;
175 int16_t* iBufferPtr = iBuffer;
177 __m128i iMoveMask1 = _mm_set_epi8(
178 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
179 __m128i iMoveMask2 = _mm_set_epi8(
180 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
182 __m128i complexVal1, complexVal2, iOutputVal;
184 unsigned int eighthPoints = num_points / 8;
186 for (number = 0; number < eighthPoints; number++) {
187 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
188 complexVectorPtr += 8;
189 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
190 complexVectorPtr += 8;
192 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
193 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
195 iOutputVal = _mm_or_si128(complexVal1, complexVal2);
197 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
202 number = eighthPoints * 8;
203 for (; number < num_points; number++) {
204 *iBufferPtr++ = *complexVectorPtr++;
212 #include <emmintrin.h>
216 unsigned int num_points)
218 unsigned int number = 0;
219 const int16_t* complexVectorPtr = (int16_t*)complexVector;
220 int16_t* iBufferPtr = iBuffer;
221 __m128i complexVal1, complexVal2, iOutputVal;
222 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
223 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
225 unsigned int eighthPoints = num_points / 8;
227 for (number = 0; number < eighthPoints; number++) {
228 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
229 complexVectorPtr += 8;
230 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
231 complexVectorPtr += 8;
233 complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
235 complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
237 complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
239 complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
241 complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
243 complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2, 0, 3, 1));
245 iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask),
246 _mm_and_si128(complexVal2, highMask));
248 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
253 number = eighthPoints * 8;
254 for (; number < num_points; number++) {
255 *iBufferPtr++ = *complexVectorPtr++;
261 #ifdef LV_HAVE_GENERIC
265 unsigned int num_points)
267 unsigned int number = 0;
268 const int16_t* complexVectorPtr = (int16_t*)complexVector;
269 int16_t* iBufferPtr = iBuffer;
270 for (number = 0; number < num_points; number++) {
271 *iBufferPtr++ = *complexVectorPtr++;
281 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H
282 #define INCLUDED_volk_16ic_deinterleave_real_16i_u_H
284 #include <inttypes.h>
289 #include <immintrin.h>
291 static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
293 unsigned int num_points)
295 unsigned int number = 0;
296 const int16_t* complexVectorPtr = (int16_t*)complexVector;
297 int16_t* iBufferPtr = iBuffer;
299 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
331 __m256i iMoveMask2 = _mm256_set_epi8(13,
364 __m256i complexVal1, complexVal2, iOutputVal;
366 unsigned int sixteenthPoints = num_points / 16;
368 for (number = 0; number < sixteenthPoints; number++) {
369 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
370 complexVectorPtr += 16;
371 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
372 complexVectorPtr += 16;
374 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
375 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
377 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
378 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
380 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
385 number = sixteenthPoints * 16;
386 for (; number < num_points; number++) {
387 *iBufferPtr++ = *complexVectorPtr++;