Crypto++
8.3
Free C++ class library of cryptographic schemes
|
Support functions for PowerPC and vector operations. More...
Go to the source code of this file.
Macros | |
#define | CONST_V8_CAST(x) ((unsigned char*)(x)) |
Cast array to vector pointer. More... | |
#define | CONST_V32_CAST(x) ((unsigned int*)(x)) |
Cast array to vector pointer. More... | |
#define | CONST_V64_CAST(x) ((unsigned long long*)(x)) |
Cast array to vector pointer. More... | |
#define | NCONST_V8_CAST(x) ((unsigned char*)(x)) |
Cast array to vector pointer. More... | |
#define | NCONST_V32_CAST(x) ((unsigned int*)(x)) |
Cast array to vector pointer. More... | |
#define | NCONST_V64_CAST(x) ((unsigned long long*)(x)) |
Cast array to vector pointer. More... | |
Typedefs | |
typedef __vector unsigned char | uint8x16_p |
Vector of 8-bit elements. More... | |
typedef __vector unsigned short | uint16x8_p |
Vector of 16-bit elements. More... | |
typedef __vector unsigned int | uint32x4_p |
Vector of 32-bit elements. More... | |
typedef __vector unsigned long long | uint64x2_p |
Vector of 64-bit elements. More... | |
Functions | |
uint32x4_p | VecZero () |
The 0 vector. More... | |
uint32x4_p | VecOne () |
The 1 vector. More... | |
template<class T > | |
T | VecReverse (const T data) |
Reverse bytes in a vector. More... | |
template<class T > | |
T | VecReverseLE (const T data) |
Reverse bytes in a vector. More... | |
template<class T > | |
T | VecReverseBE (const T data) |
Reverse bytes in a vector. More... | |
LOAD OPERATIONS | |
uint32x4_p | VecLoad_ALTIVEC (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad_ALTIVEC (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoad (const word32 src[4]) |
Loads a vector from a word array. More... | |
uint32x4_p | VecLoad (int off, const word32 src[4]) |
Loads a vector from a word array. More... | |
uint64x2_p | VecLoad (const word64 src[2]) |
Loads a vector from a double word array. More... | |
uint64x2_p | VecLoad (int off, const word64 src[2]) |
Loads a vector from a double word array. More... | |
uint32x4_p | VecLoadAligned (const byte src[16]) |
Loads a vector from an aligned byte array. More... | |
uint32x4_p | VecLoadAligned (int off, const byte src[16]) |
Loads a vector from an aligned byte array. More... | |
uint32x4_p | VecLoadAligned (const word32 src[4]) |
Loads a vector from an aligned word array. More... | |
uint32x4_p | VecLoadAligned (int off, const word32 src[4]) |
Loads a vector from an aligned word array. More... | |
uint64x2_p | VecLoadAligned (const word64 src[4]) |
Loads a vector from an aligned double word array. More... | |
uint64x2_p | VecLoadAligned (int off, const word64 src[4]) |
Loads a vector from an aligned double word array. More... | |
uint32x4_p | VecLoadBE (const byte src[16]) |
Loads a vector from a byte array. More... | |
uint32x4_p | VecLoadBE (int off, const byte src[16]) |
Loads a vector from a byte array. More... | |
STORE OPERATIONS | |
template<class T > | |
void | VecStore_ALTIVEC (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore_ALTIVEC (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStore (const T data, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, int off, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, word64 dest[2]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStore (const T data, int off, word64 dest[2]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreAligned (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreAligned (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreAligned (const T data, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreAligned (const T data, int off, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreBE (const T data, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreBE (const T data, int off, byte dest[16]) |
Stores a vector to a byte array. More... | |
template<class T > | |
void | VecStoreBE (const T data, word32 dest[4]) |
Stores a vector to a word array. More... | |
template<class T > | |
void | VecStoreBE (const T data, int off, word32 dest[4]) |
Stores a vector to a word array. More... | |
LOGICAL OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecAnd (const T1 vec1, const T2 vec2) |
AND two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecOr (const T1 vec1, const T2 vec2) |
OR two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecXor (const T1 vec1, const T2 vec2) |
XOR two vectors. More... | |
ARITHMETIC OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecAdd (const T1 vec1, const T2 vec2) |
Add two vectors. More... | |
template<class T1 , class T2 > | |
T1 | VecSub (const T1 vec1, const T2 vec2) |
Subtract two vectors. More... | |
PERMUTE OPERATIONS | |
template<class T1 , class T2 > | |
T1 | VecPermute (const T1 vec, const T2 mask) |
Permutes a vector. More... | |
template<class T1 , class T2 > | |
T1 | VecPermute (const T1 vec1, const T1 vec2, const T2 mask) |
Permutes two vectors. More... | |
SHIFT AND ROTATE OPERATIONS | |
template<unsigned int C, class T > | |
T | VecShiftLeftOctet (const T vec) |
Shift a vector left. More... | |
template<unsigned int C, class T > | |
T | VecShiftRightOctet (const T vec) |
Shift a vector right. More... | |
template<unsigned int C, class T > | |
T | VecRotateLeftOctet (const T vec) |
Rotate a vector left. More... | |
template<unsigned int C, class T > | |
T | VecRotateRightOctet (const T vec) |
Rotate a vector right. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateLeft (const uint32x4_p vec) |
Rotate a vector left. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateRight (const uint32x4_p vec) |
Rotate a vector right. More... | |
template<unsigned int C> | |
uint32x4_p | VecShiftLeft (const uint32x4_p vec) |
Shift a vector left. More... | |
template<unsigned int C> | |
uint32x4_p | VecShiftRight (const uint32x4_p vec) |
Shift a vector right. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateLeft (const uint64x2_p vec) |
Rotate a vector left. More... | |
template<unsigned int C> | |
uint64x2_p | VecShiftLeft (const uint64x2_p vec) |
Shift a vector left. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateRight (const uint64x2_p vec) |
Rotate a vector right. More... | |
template<unsigned int C> | |
uint64x2_p | VecShiftRight (const uint64x2_p vec) |
Shift a vector right. More... | |
OTHER OPERATIONS | |
template<class T > | |
T | VecMergeLow (const T vec1, const T vec2) |
Merge two vectors. More... | |
template<class T > | |
T | VecMergeHigh (const T vec1, const T vec2) |
Merge two vectors. More... | |
uint32x4_p | VecSplatWord (word32 val) |
Broadcast 32-bit word to a vector. More... | |
template<unsigned int N> | |
uint32x4_p | VecSplatElement (const uint32x4_p val) |
Broadcast 32-bit element to a vector. More... | |
uint64x2_p | VecSplatWord (word64 val) |
Broadcast 64-bit double word to a vector. More... | |
template<unsigned int N> | |
uint64x2_p | VecSplatElement (const uint64x2_p val) |
Broadcast 64-bit element to a vector. More... | |
template<class T > | |
T | VecGetLow (const T val) |
Extract a dword from a vector. More... | |
template<class T > | |
T | VecGetHigh (const T val) |
Extract a dword from a vector. More... | |
template<class T > | |
T | VecSwapWords (const T vec) |
Exchange high and low double words. More... | |
COMPARISON | |
template<class T1 , class T2 > | |
bool | VecEqual (const T1 vec1, const T2 vec2) |
Compare two vectors. More... | |
template<class T1 , class T2 > | |
bool | VecNotEqual (const T1 vec1, const T2 vec2) |
Compare two vectors. More... | |
32-BIT ALTIVEC | |
uint32x4_p | VecAdd64 (const uint32x4_p &vec1, const uint32x4_p &vec2) |
Add two vectors as if uint64x2_p. More... | |
uint64x2_p | VecAdd64 (const uint64x2_p &vec1, const uint64x2_p &vec2) |
Add two vectors as if uint64x2_p. More... | |
uint32x4_p | VecSub64 (const uint32x4_p &vec1, const uint32x4_p &vec2) |
Subtract two vectors as if uint64x2_p. More... | |
uint64x2_p | VecSub64 (const uint64x2_p &vec1, const uint64x2_p &vec2) |
Subtract two vectors as if uint64x2_p. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateLeft64 (const uint32x4_p vec) |
Rotate a vector left as if uint64x2_p. More... | |
template<> | |
uint32x4_p | VecRotateLeft64< 8 > (const uint32x4_p vec) |
Rotate a vector left as if uint64x2_p. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateLeft64 (const uint64x2_p vec) |
Rotate a vector left as if uint64x2_p. More... | |
template<unsigned int C> | |
uint32x4_p | VecRotateRight64 (const uint32x4_p vec) |
Rotate a vector right as if uint64x2_p. More... | |
template<> | |
uint32x4_p | VecRotateRight64< 8 > (const uint32x4_p vec) |
Rotate a vector right as if uint64x2_p. More... | |
template<unsigned int C> | |
uint64x2_p | VecRotateRight64 (const uint64x2_p vec) |
Rotate a vector right as if uint64x2_p. More... | |
template<class T1 , class T2 > | |
T1 | VecAnd64 (const T1 vec1, const T2 vec2) |
AND two vectors as if uint64x2_p. More... | |
template<class T1 , class T2 > | |
T1 | VecOr64 (const T1 vec1, const T2 vec2) |
OR two vectors as if uint64x2_p. More... | |
template<class T1 , class T2 > | |
T1 | VecXor64 (const T1 vec1, const T2 vec2) |
XOR two vectors as if uint64x2_p. More... | |
uint32x4_p | VecSplatWord64 (word64 val) |
Broadcast 64-bit double word to a vector. More... | |
template<unsigned int N> | |
uint32x4_p | VecSplatElement64 (const uint32x4_p val) |
Broadcast 64-bit element to a vector as if uint64x2_p. More... | |
template<unsigned int N> | |
uint64x2_p | VecSplatElement64 (const uint64x2_p val) |
Broadcast 64-bit element to a vector. More... | |
POLYNOMIAL MULTIPLICATION | |
uint32x4_p | VecPolyMultiply (const uint32x4_p &a, const uint32x4_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecPolyMultiply (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecIntelMultiply00 (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecIntelMultiply01 (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecIntelMultiply10 (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
uint64x2_p | VecIntelMultiply11 (const uint64x2_p &a, const uint64x2_p &b) |
Polynomial multiplication. More... | |
AES ENCRYPTION | |
template<class T1 , class T2 > | |
T1 | VecEncrypt (const T1 state, const T2 key) |
One round of AES encryption. More... | |
template<class T1 , class T2 > | |
T1 | VecEncryptLast (const T1 state, const T2 key) |
Final round of AES encryption. More... | |
template<class T1 , class T2 > | |
T1 | VecDecrypt (const T1 state, const T2 key) |
One round of AES decryption. More... | |
template<class T1 , class T2 > | |
T1 | VecDecryptLast (const T1 state, const T2 key) |
Final round of AES decryption. More... | |
SHA DIGESTS | |
template<int func, int fmask, class T > | |
T | VecSHA256 (const T data) |
SHA256 Sigma functions. More... | |
template<int func, int fmask, class T > | |
T | VecSHA512 (const T data) |
SHA512 Sigma functions. More... | |
Support functions for PowerPC and vector operations.
This header provides an agnostic interface into Clang, GCC and IBM XL C/C++ compilers modulo their different built-in functions for accessing vector intructions. The abstractions are necesssary to support back to GCC 4.8 and XLC 11 and 12. GCC 4.8 and 4.9 are still popular, and they are the default compiler for GCC112, GCC119 and others on the compile farm. Older IBM XL C/C++ compilers also have the need due to lack of vec_xl
and vec_xst
support on some platforms. Modern compilers provide best support and don't need many of the hacks below. The library is tested with the following PowerPC machines and compilers. GCC110, GCC111, GCC112, GCC119 and GCC135 are provided by the GCC Compile Farm
late LLVM Clang (traditional Clang compiler) The LLVM front-end makes it tricky to write portable code because LLVM pretends to be other compilers but cannot consume other compiler's builtins. When using XLC with -qxlcompatmacros the compiler pretends to be GCC, Clang and XLC all at once but it can only consume it's variety of builtins. At Crypto++ 8.0 the various Vector{FuncName}
were renamed to Vec{FuncName}
. For example, VectorAnd
was changed to VecAnd
. The name change helped consolidate two slightly different implementations. At Crypto++ 8.3 the library added select 64-bit functions for 32-bit Altivec. For example, VecAdd64
and VecSub64
take 32-bit vectors and adds or subtracts them as if there were vectors with two 64-bit elements. The functions dramtically improve performance for some algorithms on some platforms, like SIMON128 and SPECK128 on Power6 and earlier. For example, SPECK128 improved from 70 cpb to 10 cpb on an old PowerMac. Use the functions like shown below.
#if defined(_ARCH_PWR8) # define speck128_t uint64x2_p #else # define speck128_t uint32x4_p #endif
speck128_t rk, x1, x2, y1, y2; rk = (speck128_t)VecLoadAligned(ptr); x1 = VecRotateRight64<8>(x1); x1 = VecAdd64(x1, y1); ...
Definition in file ppc_simd.h.
#define CONST_V8_CAST | ( | x | ) | ((unsigned char*)(x)) |
Cast array to vector pointer.
CONST_V8_CAST casts a const array to a vector pointer for a byte array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 141 of file ppc_simd.h.
#define CONST_V32_CAST | ( | x | ) | ((unsigned int*)(x)) |
Cast array to vector pointer.
CONST_V32_CAST casts a const array to a vector pointer for a word array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 147 of file ppc_simd.h.
#define CONST_V64_CAST | ( | x | ) | ((unsigned long long*)(x)) |
Cast array to vector pointer.
CONST_V64_CAST casts a const array to a vector pointer for a double word array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 153 of file ppc_simd.h.
#define NCONST_V8_CAST | ( | x | ) | ((unsigned char*)(x)) |
Cast array to vector pointer.
NCONST_V8_CAST casts an array to a vector pointer for a byte array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 159 of file ppc_simd.h.
#define NCONST_V32_CAST | ( | x | ) | ((unsigned int*)(x)) |
Cast array to vector pointer.
NCONST_V32_CAST casts an array to a vector pointer for a word array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 165 of file ppc_simd.h.
#define NCONST_V64_CAST | ( | x | ) | ((unsigned long long*)(x)) |
Cast array to vector pointer.
NCONST_V64_CAST casts an array to a vector pointer for a double word array. The Power ABI says source arrays are non-const, so this define removes the const. XLC++ will fail the compile if the source array is const.
Definition at line 171 of file ppc_simd.h.
typedef __vector unsigned char uint8x16_p |
Vector of 8-bit elements.
Definition at line 188 of file ppc_simd.h.
typedef __vector unsigned short uint16x8_p |
Vector of 16-bit elements.
Definition at line 193 of file ppc_simd.h.
typedef __vector unsigned int uint32x4_p |
Vector of 32-bit elements.
Definition at line 198 of file ppc_simd.h.
typedef __vector unsigned long long uint64x2_p |
Vector of 64-bit elements.
uint64x2_p is available on POWER7 with VSX and above. Most supporting functions, like 64-bit vec_add
(vaddudm
) and vec_sub
(vsubudm
), did not arrive until POWER8.
Definition at line 208 of file ppc_simd.h.
|
inline |
The 0 vector.
Definition at line 214 of file ppc_simd.h.
|
inline |
The 1 vector.
Definition at line 223 of file ppc_simd.h.
|
inline |
Reverse bytes in a vector.
T | vector type |
data | the vector |
Definition at line 238 of file ppc_simd.h.
|
inline |
Reverse bytes in a vector.
T | vector type |
data | the vector |
Definition at line 259 of file ppc_simd.h.
|
inline |
Reverse bytes in a vector.
T | vector type |
data | the vector |
Definition at line 279 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array Loads a vector in native endian format from a byte array. VecLoad_ALTIVEC() uses vec_ld if the effective address of src is aligned. If unaligned it uses vec_lvsl , vec_ld , vec_perm and src . The fixups using vec_lvsl and vec_perm are relatively expensive so you should provide aligned memory adresses. |
Definition at line 304 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the src byte array Loads a vector in native endian format from a byte array. VecLoad_ALTIVEC() uses vec_ld if the effective address of src is aligned. If unaligned it uses vec_lvsl , vec_ld , vec_perm and src . The fixups using vec_lvsl and vec_perm are relatively expensive so you should provide aligned memory adresses. |
Definition at line 335 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array VecLoad() loads a vector from a byte array. VecLoad() uses POWER9's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER9 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 365 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the src byte array VecLoad() loads a vector from a byte array. VecLoad() uses POWER9's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER9 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 395 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array VecLoad() loads a vector from a word array. VecLoad() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 424 of file ppc_simd.h.
|
inline |
Loads a vector from a word array.
src | the word array |
off | offset into the word array VecLoad() loads a vector from a word array. VecLoad() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 456 of file ppc_simd.h.
|
inline |
Loads a vector from a double word array.
src | the double word array VecLoad() loads a vector from a double word array. VecLoad() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 and VSX are not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. VecLoad() with 64-bit elements is available on POWER7 and above. |
Definition at line 490 of file ppc_simd.h.
|
inline |
Loads a vector from a double word array.
src | the double word array |
off | offset into the double word array VecLoad() loads a vector from a double word array. VecLoad() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 and VSX are not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. VecLoad() with 64-bit elements is available on POWER8 and above. |
Definition at line 524 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned byte array.
src | the byte array VecLoadAligned() loads a vector from an aligned byte array. VecLoadAligned() uses POWER9's vec_xl if available. vec_ld is used if POWER9 is not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 556 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned byte array.
src | the byte array |
off | offset into the src byte array VecLoadAligned() loads a vector from an aligned byte array. VecLoadAligned() uses POWER9's vec_xl if available. vec_ld is used if POWER9 is not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 584 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned word array.
src | the word array VecLoadAligned() loads a vector from an aligned word array. VecLoadAligned() uses POWER7's and VSX's vec_xl if available. vec_ld is used if POWER7 or VSX are not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 611 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned word array.
src | the word array |
off | offset into the src word array VecLoadAligned() loads a vector from an aligned word array. VecLoadAligned() uses POWER7's and VSX's vec_xl if available. vec_ld is used if POWER7 or VSX are not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 641 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned double word array.
src | the double word array VecLoadAligned() loads a vector from an aligned double word array. VecLoadAligned() uses POWER7's and VSX's vec_xl if available. vec_ld is used if POWER7 or VSX are not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 672 of file ppc_simd.h.
|
inline |
Loads a vector from an aligned double word array.
src | the double word array |
off | offset into the src double word array VecLoadAligned() loads a vector from an aligned double word array. VecLoadAligned() uses POWER7's and VSX's vec_xl if available. vec_ld is used if POWER7 or VSX are not available. The effective address of src must be 16-byte aligned for Altivec. |
Definition at line 703 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array VecLoadBE() loads a vector from a byte array. VecLoadBE will reverse all bytes in the array on a little endian system. VecLoadBE() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 or VSX are not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 738 of file ppc_simd.h.
|
inline |
Loads a vector from a byte array.
src | the byte array |
off | offset into the src byte array VecLoadBE() loads a vector from a byte array. VecLoadBE will reverse all bytes in the array on a little endian system. VecLoadBE() uses POWER7's and VSX's vec_xl if available. The instruction does not require aligned effective memory addresses. VecLoad_ALTIVEC() is used if POWER7 is not available. VecLoad_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 772 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array VecStore_ALTIVEC() stores a vector to a byte array. VecStore_ALTIVEC() uses vec_st if the effective address of dest is aligned, and uses vec_ste otherwise. vec_ste is relatively expensive so you should provide aligned memory adresses. VecStore_ALTIVEC() is used when POWER7 or above and unaligned loads is not available. |
Definition at line 813 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | offset into the dest byte array |
dest | the byte array VecStore_ALTIVEC() stores a vector to a byte array. VecStore_ALTIVEC() uses vec_st if the effective address of dest is aligned, and uses vec_ste otherwise. vec_ste is relatively expensive so you should provide aligned memory adresses. VecStore_ALTIVEC() is used when POWER7 or above and unaligned loads is not available. |
Definition at line 853 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array VecStore() stores a vector to a byte array. VecStore() uses POWER9's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER9 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 891 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | offset into the dest byte array |
dest | the byte array VecStore() stores a vector to a byte array. VecStore() uses POWER9's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER9 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 924 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array VecStore() stores a vector to a word array. VecStore() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 or VSX are not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 956 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | offset into the dest word array |
dest | the word array VecStore() stores a vector to a word array. VecStore() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 or VSX are not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 991 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array VecStore() stores a vector to a word array. VecStore() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 or VSX are not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. VecStore() with 64-bit elements is available on POWER8 and above. |
Definition at line 1026 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | offset into the dest word array |
dest | the word array VecStore() stores a vector to a word array. VecStore() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 or VSX are not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. VecStore() with 64-bit elements is available on POWER8 and above. |
Definition at line 1063 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array VecStoreAligned() stores a vector from an aligned byte array. VecStoreAligned() uses POWER9's vec_xl if available. vec_st is used if POWER9 is not available. The effective address of dest must be 16-byte aligned for Altivec. |
Definition at line 1096 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | offset into the dest byte array |
dest | the byte array VecStoreAligned() stores a vector from an aligned byte array. VecStoreAligned() uses POWER9's vec_xl if available. vec_st is used if POWER9 is not available. The effective address of dest must be 16-byte aligned for Altivec. |
Definition at line 1127 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array VecStoreAligned() stores a vector from an aligned word array. VecStoreAligned() uses POWER9's vec_xl if available. POWER7 vec_xst is used if POWER9 is not available. vec_st is used if POWER7 is not available. The effective address of dest must be 16-byte aligned for Altivec. |
Definition at line 1158 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | offset into the dest word array |
dest | the word array VecStoreAligned() stores a vector from an aligned word array. VecStoreAligned() uses POWER9's vec_xl if available. POWER7 vec_xst is used if POWER9 is not available. vec_st is used if POWER7 is not available. The effective address of dest must be 16-byte aligned for Altivec. |
Definition at line 1192 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
dest | the byte array VecStoreBE() stores a vector to a byte array. VecStoreBE will reverse all bytes in the array on a little endian system. VecStoreBE() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 1227 of file ppc_simd.h.
|
inline |
Stores a vector to a byte array.
T | vector type |
data | the vector |
off | offset into the dest byte array |
dest | the byte array VecStoreBE() stores a vector to a byte array. VecStoreBE will reverse all bytes in the array on a little endian system. VecStoreBE() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 1263 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
dest | the word array VecStoreBE() stores a vector to a word array. VecStoreBE will reverse all bytes in the array on a little endian system. VecStoreBE() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 1298 of file ppc_simd.h.
|
inline |
Stores a vector to a word array.
T | vector type |
data | the vector |
off | offset into the dest word array |
dest | the word array VecStoreBE() stores a vector to a word array. VecStoreBE will reverse all words in the array on a little endian system. VecStoreBE() uses POWER7's and VSX's vec_xst if available. The instruction does not require aligned effective memory addresses. VecStore_ALTIVEC() is used if POWER7 is not available. VecStore_ALTIVEC() can be relatively expensive if extra instructions are required to fix up unaligned memory addresses. |
Definition at line 1334 of file ppc_simd.h.
|
inline |
AND two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 & vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. Definition at line 1372 of file ppc_simd.h.
|
inline |
OR two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 | vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. Definition at line 1391 of file ppc_simd.h.
|
inline |
XOR two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 ^ vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. Definition at line 1410 of file ppc_simd.h.
|
inline |
Add two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 + vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. Definition at line 1434 of file ppc_simd.h.
|
inline |
Subtract two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector VecSub() performs vec1 - vec2 . vec2 is cast to the same type as vec1. The return vector is the same type as vec1. |
Definition at line 1452 of file ppc_simd.h.
|
inline |
Permutes a vector.
T1 | vector type |
T2 | vector type |
vec | the vector |
mask | vector mask |
Definition at line 1474 of file ppc_simd.h.
|
inline |
Permutes two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
mask | vector mask |
Definition at line 1492 of file ppc_simd.h.
|
inline |
Shift a vector left.
C | shift byte count |
T | vector type |
vec | the vector |
vec_sld(a, z, c)
. On little endian machines VecShiftLeftOctet() is translated to vec_sld(z, a, 16-c)
. You should always call the function as if on a big endian machine as shown below. uint8x16_p x = VecLoad(ptr); uint8x16_p y = VecShiftLeftOctet<12>(x);
Definition at line 1524 of file ppc_simd.h.
|
inline |
Shift a vector right.
C | shift byte count |
T | vector type |
vec | the vector |
vec_sld(a, z, c)
. On little endian machines VecShiftRightOctet() is translated to vec_sld(z, a, 16-c)
. You should always call the function as if on a big endian machine as shown below. uint8x16_p x = VecLoad(ptr); uint8x16_p y = VecShiftRightOctet<12>(y);
Definition at line 1571 of file ppc_simd.h.
|
inline |
Rotate a vector left.
C | shift byte count |
T | vector type |
vec | the vector |
Definition at line 1610 of file ppc_simd.h.
|
inline |
Rotate a vector right.
C | shift byte count |
T | vector type |
vec | the vector |
Definition at line 1635 of file ppc_simd.h.
|
inline |
Rotate a vector left.
C | rotate bit count |
vec | the vector |
Definition at line 1656 of file ppc_simd.h.
|
inline |
Rotate a vector right.
C | rotate bit count |
vec | the vector |
Definition at line 1672 of file ppc_simd.h.
|
inline |
Shift a vector left.
C | shift bit count |
vec | the vector |
Definition at line 1688 of file ppc_simd.h.
|
inline |
Shift a vector right.
C | shift bit count |
vec | the vector |
Definition at line 1704 of file ppc_simd.h.
|
inline |
Rotate a vector left.
C | rotate bit count |
vec | the vector |
Definition at line 1725 of file ppc_simd.h.
|
inline |
Shift a vector left.
C | shift bit count |
vec | the vector |
Definition at line 1743 of file ppc_simd.h.
|
inline |
Rotate a vector right.
C | rotate bit count |
vec | the vector |
Definition at line 1761 of file ppc_simd.h.
|
inline |
Shift a vector right.
C | shift bit count |
vec | the vector |
Definition at line 1779 of file ppc_simd.h.
|
inline |
Merge two vectors.
T | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1801 of file ppc_simd.h.
|
inline |
Merge two vectors.
T | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1815 of file ppc_simd.h.
|
inline |
Broadcast 32-bit word to a vector.
val | the 32-bit value |
Definition at line 1826 of file ppc_simd.h.
|
inline |
Broadcast 32-bit element to a vector.
the | element number |
val | the 32-bit value |
Definition at line 1848 of file ppc_simd.h.
|
inline |
Broadcast 64-bit double word to a vector.
val | the 64-bit value |
Definition at line 1860 of file ppc_simd.h.
|
inline |
Broadcast 64-bit element to a vector.
the | element number |
val | the 64-bit value |
Definition at line 1874 of file ppc_simd.h.
|
inline |
Extract a dword from a vector.
T | vector type |
val | the vector |
Definition at line 1906 of file ppc_simd.h.
|
inline |
Extract a dword from a vector.
T | vector type |
val | the vector |
Definition at line 1928 of file ppc_simd.h.
|
inline |
Exchange high and low double words.
T | vector type |
vec | the vector |
Definition at line 1946 of file ppc_simd.h.
|
inline |
Compare two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1968 of file ppc_simd.h.
|
inline |
Compare two vectors.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
Definition at line 1985 of file ppc_simd.h.
|
inline |
Add two vectors as if uint64x2_p.
vec1 | the first vector |
vec2 | the second vector |
vec1 + vec2
. VecAdd64() performs as if adding two uint64x2_p vectors. On POWER7 and below VecAdd64() manages the carries from the elements. Definition at line 2007 of file ppc_simd.h.
|
inline |
Add two vectors as if uint64x2_p.
vec1 | the first vector |
vec2 | the second vector |
vec1 + vec2
. VecAdd64() performs as if adding two uint64x2_p vectors. On POWER7 and below VecAdd64() manages the carries from the elements. Definition at line 2043 of file ppc_simd.h.
|
inline |
Subtract two vectors as if uint64x2_p.
vec1 | the first vector |
vec2 | the second vector VecSub64() performs vec1 - vec2 . VecSub64() performs as if subtracting two uint64x2_p vectors. On POWER7 and below VecSub64() manages the borrows from the elements. |
Definition at line 2070 of file ppc_simd.h.
|
inline |
Subtract two vectors as if uint64x2_p.
vec1 | the first vector |
vec2 | the second vector VecSub64() performs vec1 - vec2 . VecSub64() performs as if subtracting two uint64x2_p vectors. On POWER7 and below VecSub64() manages the borrows from the elements. |
Definition at line 2107 of file ppc_simd.h.
|
inline |
Rotate a vector left as if uint64x2_p.
C | rotate bit count |
vec | the vector |
Definition at line 2135 of file ppc_simd.h.
|
inline |
Rotate a vector left as if uint64x2_p.
vec | the vector |
Definition at line 2184 of file ppc_simd.h.
|
inline |
Rotate a vector left as if uint64x2_p.
C | rotate bit count |
vec | the vector |
Definition at line 2206 of file ppc_simd.h.
|
inline |
Rotate a vector right as if uint64x2_p.
C | rotate bit count |
vec | the vector |
Definition at line 2233 of file ppc_simd.h.
|
inline |
Rotate a vector right as if uint64x2_p.
vec | the vector |
Definition at line 2283 of file ppc_simd.h.
|
inline |
Rotate a vector right as if uint64x2_p.
C | rotate bit count |
vec | the vector |
Definition at line 2305 of file ppc_simd.h.
|
inline |
AND two vectors as if uint64x2_p.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 & vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. VecAnd64() is a convenience function that simply performs a VecAnd(). Definition at line 2336 of file ppc_simd.h.
|
inline |
OR two vectors as if uint64x2_p.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 | vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. VecOr64() is a convenience function that simply performs a VecOr(). Definition at line 2355 of file ppc_simd.h.
|
inline |
XOR two vectors as if uint64x2_p.
T1 | vector type |
T2 | vector type |
vec1 | the first vector |
vec2 | the second vector |
vec1 ^ vec2
. vec2 is cast to the same type as vec1. The return vector is the same type as vec1. VecXor64() is a convenience function that simply performs a VecXor(). Definition at line 2374 of file ppc_simd.h.
|
inline |
Broadcast 64-bit double word to a vector.
val | the 64-bit value |
Definition at line 2385 of file ppc_simd.h.
|
inline |
Broadcast 64-bit element to a vector as if uint64x2_p.
the | element number |
val | the 64-bit value |
Definition at line 2404 of file ppc_simd.h.
|
inline |
Broadcast 64-bit element to a vector.
the | element number |
val | the 64-bit value |
Definition at line 2430 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
ah*bh XOR al*bl
. It is different behavior than Intel polynomial multiplication. To obtain a single product without the XOR, then set one of the high or low terms to 0. For example, setting ah=0
results in 0*bh XOR al*bl = al*bl
. Definition at line 2461 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
ah*bh XOR al*bl
. It is different behavior than Intel polynomial multiplication. To obtain a single product without the XOR, then set one of the high or low terms to 0. For example, setting ah=0
results in 0*bh XOR al*bl = al*bl
. Definition at line 2486 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
c = _mm_clmulepi64_si128(a, b, 0x00)
. The 0x00
indicates the low 64-bits of a
and b
are multiplied. Definition at line 2510 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
c = _mm_clmulepi64_si128(a, b, 0x01)
. The 0x01
indicates the low 64-bits of a
and high 64-bits of b
are multiplied. Definition at line 2532 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
c = _mm_clmulepi64_si128(a, b, 0x10)
. The 0x10
indicates the high 64-bits of a
and low 64-bits of b
are multiplied. Definition at line 2554 of file ppc_simd.h.
|
inline |
Polynomial multiplication.
a | the first term |
b | the second term |
c = _mm_clmulepi64_si128(a, b, 0x11)
. The 0x11
indicates the high 64-bits of a
and b
are multiplied. Definition at line 2576 of file ppc_simd.h.
|
inline |
One round of AES encryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector VecEncrypt() performs one round of AES encryption of state using subkey key. The return vector is the same type as state. VecEncrypt() is available on POWER8 and above. |
Definition at line 2602 of file ppc_simd.h.
|
inline |
Final round of AES encryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector VecEncryptLast() performs the final round of AES encryption of state using subkey key. The return vector is the same type as state. VecEncryptLast() is available on POWER8 and above. |
Definition at line 2627 of file ppc_simd.h.
|
inline |
One round of AES decryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector VecDecrypt() performs one round of AES decryption of state using subkey key. The return vector is the same type as state. VecDecrypt() is available on POWER8 and above. |
Definition at line 2652 of file ppc_simd.h.
|
inline |
Final round of AES decryption.
T1 | vector type |
T2 | vector type |
state | the state vector |
key | the subkey vector VecDecryptLast() performs the final round of AES decryption of state using subkey key. The return vector is the same type as state. VecDecryptLast() is available on POWER8 and above. |
Definition at line 2677 of file ppc_simd.h.
|
inline |
SHA256 Sigma functions.
func | function |
fmask | function mask |
T | vector type |
data | the block to transform VecSHA256() selects sigma0, sigma1, Sigma0, Sigma1 based on func and fmask. The return vector is the same type as data. VecSHA256() is available on POWER8 and above. |
Definition at line 2707 of file ppc_simd.h.
|
inline |
SHA512 Sigma functions.
func | function |
fmask | function mask |
T | vector type |
data | the block to transform VecSHA512() selects sigma0, sigma1, Sigma0, Sigma1 based on func and fmask. The return vector is the same type as data. VecSHA512() is available on POWER8 and above. |
Definition at line 2732 of file ppc_simd.h.