Vector Optimized Library of Kernels  2.3
Architecture-tuned implementations of math kernels
volk_8ic_deinterleave_real_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
53 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
54 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
55 
56 #include <inttypes.h>
57 #include <stdio.h>
58 
59 
60 #ifdef LV_HAVE_AVX2
61 #include <immintrin.h>
62 
63 static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
64  const lv_8sc_t* complexVector,
65  unsigned int num_points)
66 {
67  unsigned int number = 0;
68  const int8_t* complexVectorPtr = (int8_t*)complexVector;
69  int16_t* iBufferPtr = iBuffer;
70  __m256i moveMask = _mm256_set_epi8(0x80,
71  0x80,
72  0x80,
73  0x80,
74  0x80,
75  0x80,
76  0x80,
77  0x80,
78  14,
79  12,
80  10,
81  8,
82  6,
83  4,
84  2,
85  0,
86  0x80,
87  0x80,
88  0x80,
89  0x80,
90  0x80,
91  0x80,
92  0x80,
93  0x80,
94  14,
95  12,
96  10,
97  8,
98  6,
99  4,
100  2,
101  0);
102  __m256i complexVal, outputVal;
103  __m128i outputVal0;
104 
105  unsigned int sixteenthPoints = num_points / 16;
106 
107  for (number = 0; number < sixteenthPoints; number++) {
108  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
109  complexVectorPtr += 32;
110 
111  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
112  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
113 
114  outputVal0 = _mm256_extractf128_si256(complexVal, 0);
115 
116  outputVal = _mm256_cvtepi8_epi16(outputVal0);
117  outputVal = _mm256_slli_epi16(outputVal, 7);
118 
119  _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
120 
121  iBufferPtr += 16;
122  }
123 
124  number = sixteenthPoints * 16;
125  for (; number < num_points; number++) {
126  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
127  complexVectorPtr++;
128  }
129 }
130 #endif /* LV_HAVE_AVX2 */
131 
132 #ifdef LV_HAVE_SSE4_1
133 #include <smmintrin.h>
134 
135 static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
136  const lv_8sc_t* complexVector,
137  unsigned int num_points)
138 {
139  unsigned int number = 0;
140  const int8_t* complexVectorPtr = (int8_t*)complexVector;
141  int16_t* iBufferPtr = iBuffer;
142  __m128i moveMask = _mm_set_epi8(
143  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
144  __m128i complexVal, outputVal;
145 
146  unsigned int eighthPoints = num_points / 8;
147 
148  for (number = 0; number < eighthPoints; number++) {
149  complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
150  complexVectorPtr += 16;
151 
152  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
153 
154  outputVal = _mm_cvtepi8_epi16(complexVal);
155  outputVal = _mm_slli_epi16(outputVal, 7);
156 
157  _mm_store_si128((__m128i*)iBufferPtr, outputVal);
158  iBufferPtr += 8;
159  }
160 
161  number = eighthPoints * 8;
162  for (; number < num_points; number++) {
163  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
164  complexVectorPtr++;
165  }
166 }
167 #endif /* LV_HAVE_SSE4_1 */
168 
169 
170 #ifdef LV_HAVE_AVX
171 #include <immintrin.h>
172 
173 static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer,
174  const lv_8sc_t* complexVector,
175  unsigned int num_points)
176 {
177  unsigned int number = 0;
178  const int8_t* complexVectorPtr = (int8_t*)complexVector;
179  int16_t* iBufferPtr = iBuffer;
180  __m128i moveMask = _mm_set_epi8(
181  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
182  __m256i complexVal, outputVal;
183  __m128i complexVal1, complexVal0, outputVal1, outputVal0;
184 
185  unsigned int sixteenthPoints = num_points / 16;
186 
187  for (number = 0; number < sixteenthPoints; number++) {
188  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
189  complexVectorPtr += 32;
190 
191  complexVal1 = _mm256_extractf128_si256(complexVal, 1);
192  complexVal0 = _mm256_extractf128_si256(complexVal, 0);
193 
194  outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
195  outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
196 
197  outputVal1 = _mm_cvtepi8_epi16(outputVal1);
198  outputVal1 = _mm_slli_epi16(outputVal1, 7);
199  outputVal0 = _mm_cvtepi8_epi16(outputVal0);
200  outputVal0 = _mm_slli_epi16(outputVal0, 7);
201 
202  __m256i dummy = _mm256_setzero_si256();
203  outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
204  outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
205  _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
206 
207  iBufferPtr += 16;
208  }
209 
210  number = sixteenthPoints * 16;
211  for (; number < num_points; number++) {
212  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
213  complexVectorPtr++;
214  }
215 }
216 #endif /* LV_HAVE_AVX */
217 
218 
219 #ifdef LV_HAVE_GENERIC
220 
221 static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer,
222  const lv_8sc_t* complexVector,
223  unsigned int num_points)
224 {
225  unsigned int number = 0;
226  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
227  int16_t* iBufferPtr = iBuffer;
228  for (number = 0; number < num_points; number++) {
229  *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
230  complexVectorPtr++;
231  }
232 }
233 #endif /* LV_HAVE_GENERIC */
234 
235 
236 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
237 
238 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
239 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
240 
241 #include <inttypes.h>
242 #include <stdio.h>
243 
244 
245 #ifdef LV_HAVE_AVX2
246 #include <immintrin.h>
247 
248 static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
249  const lv_8sc_t* complexVector,
250  unsigned int num_points)
251 {
252  unsigned int number = 0;
253  const int8_t* complexVectorPtr = (int8_t*)complexVector;
254  int16_t* iBufferPtr = iBuffer;
255  __m256i moveMask = _mm256_set_epi8(0x80,
256  0x80,
257  0x80,
258  0x80,
259  0x80,
260  0x80,
261  0x80,
262  0x80,
263  14,
264  12,
265  10,
266  8,
267  6,
268  4,
269  2,
270  0,
271  0x80,
272  0x80,
273  0x80,
274  0x80,
275  0x80,
276  0x80,
277  0x80,
278  0x80,
279  14,
280  12,
281  10,
282  8,
283  6,
284  4,
285  2,
286  0);
287  __m256i complexVal, outputVal;
288  __m128i outputVal0;
289 
290  unsigned int sixteenthPoints = num_points / 16;
291 
292  for (number = 0; number < sixteenthPoints; number++) {
293  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
294  complexVectorPtr += 32;
295 
296  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
297  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
298 
299  outputVal0 = _mm256_extractf128_si256(complexVal, 0);
300 
301  outputVal = _mm256_cvtepi8_epi16(outputVal0);
302  outputVal = _mm256_slli_epi16(outputVal, 7);
303 
304  _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
305 
306  iBufferPtr += 16;
307  }
308 
309  number = sixteenthPoints * 16;
310  for (; number < num_points; number++) {
311  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
312  complexVectorPtr++;
313  }
314 }
315 #endif /* LV_HAVE_AVX2 */
316 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
volk_8ic_deinterleave_real_16i_a_avx
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:173
volk_8ic_deinterleave_real_16i_generic
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:221
lv_8sc_t
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:66