libstdc++
simd_ppc.h
1 // Simd PowerPC specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #ifndef __ALTIVEC__
31 #error "simd_ppc.h may only be included when AltiVec/VMX is available"
32 #endif
33 
34 _GLIBCXX_SIMD_BEGIN_NAMESPACE
35 
36 // _SimdImplPpc {{{
37 template <typename _Abi>
38  struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
39  {
40  using _Base = _SimdImplBuiltin<_Abi>;
41 
42  // Byte and halfword shift instructions on PPC only consider the low 3 or 4
43  // bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more)
44  // is UB without extra measures. To match scalar behavior, byte and halfword
45  // shifts need an extra fixup step.
46 
47  // _S_bit_shift_left {{{
48  template <typename _Tp, size_t _Np>
49  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
50  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
51  {
52  __x = _Base::_S_bit_shift_left(__x, __y);
53  if constexpr (sizeof(_Tp) < sizeof(int))
54  __x._M_data
55  = (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data;
56  return __x;
57  }
58 
59  template <typename _Tp, size_t _Np>
60  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
61  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
62  {
63  __x = _Base::_S_bit_shift_left(__x, __y);
64  if constexpr (sizeof(_Tp) < sizeof(int))
65  {
66  if (__y >= sizeof(_Tp) * __CHAR_BIT__)
67  return {};
68  }
69  return __x;
70  }
71 
72  // }}}
73  // _S_bit_shift_right {{{
74  template <typename _Tp, size_t _Np>
75  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
76  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
77  {
78  if constexpr (sizeof(_Tp) < sizeof(int))
79  {
80  constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
81  if constexpr (is_unsigned_v<_Tp>)
82  return (__y._M_data < __nbits)
83  & _Base::_S_bit_shift_right(__x, __y)._M_data;
84  else
85  {
86  _Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data
87  >= __nbits),
88  __y, __nbits - 1);
89  return _Base::_S_bit_shift_right(__x, __y);
90  }
91  }
92  else
93  return _Base::_S_bit_shift_right(__x, __y);
94  }
95 
96  template <typename _Tp, size_t _Np>
97  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
98  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
99  {
100  if constexpr (sizeof(_Tp) < sizeof(int))
101  {
102  constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
103  if (__y >= __nbits)
104  {
105  if constexpr (is_unsigned_v<_Tp>)
106  return {};
107  else
108  return _Base::_S_bit_shift_right(__x, __nbits - 1);
109  }
110  }
111  return _Base::_S_bit_shift_right(__x, __y);
112  }
113 
114  // }}}
115  };
116 
117 // }}}
118 
119 _GLIBCXX_SIMD_END_NAMESPACE
120 #endif // __cplusplus >= 201703L
121 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
122 
123 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80