7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
16 #ifndef CRYPTOPP_IMPORTS
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
48 #if (__SUNPRO_CC >= 0x5130)
50 # define MAYBE_UNCONST_CAST const_cast<word*>
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
72 bool AssignIntToInteger(
const std::type_info &valueType,
void *pInteger,
const void *pInt)
74 if (valueType !=
typeid(
Integer))
76 *
reinterpret_cast<Integer *
>(pInteger) = *
reinterpret_cast<const int *
>(pInt);
80 inline static int Compare(
const word *A,
const word *B,
size_t N)
91 inline static int Increment(word *A,
size_t N, word B=1)
98 for (
unsigned i=1; i<N; i++)
104 inline static int Decrement(word *A,
size_t N, word B=1)
111 for (
unsigned i=1; i<N; i++)
117 static void TwosComplement(word *A,
size_t N)
120 for (
unsigned i=0; i<N; i++)
124 static word AtomicInverseModPower2(word A)
130 for (
unsigned i=3; i<WORD_BITS; i*=2)
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140 #define Declare2Words(x) word x##0, x##1;
141 #define AssignWord(a, b) a##0 = b; a##1 = 0;
142 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143 #define LowWord(a) a##0
144 #define HighWord(a) a##1
146 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147 #ifndef __INTEL_COMPILER
148 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
150 #elif defined(__DECCXX)
151 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152 #elif defined(__x86_64__)
153 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
155 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
157 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
165 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
167 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
170 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
172 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174 #define GetCarry(u) u##1
175 #define GetBorrow(u) u##1
177 #define Declare2Words(x) dword x;
178 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179 #define MultiplyWords(p, a, b) p = __emulu(a, b);
181 #define MultiplyWords(p, a, b) p = (dword)a*b;
183 #define AssignWord(a, b) a = b;
184 #define Add2WordsBy1(a, b, c) a = b + c;
185 #define Acc2WordsBy2(a, b) a += b;
186 #define LowWord(a) word(a)
187 #define HighWord(a) word(a>>WORD_BITS)
188 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191 #define GetCarry(u) HighWord(u)
192 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
195 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
198 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
201 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
209 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
211 DWord() : m_whole(0) {memset(&m_whole, 0xa,
sizeof(m_whole));}
212 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
214 DWord() : m_halfs() {memset(&m_halfs, 0xaa,
sizeof(m_halfs));}
219 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
220 explicit DWord(word low) : m_whole(low) {}
222 explicit DWord(word low)
229 DWord(word low, word high)
235 static DWord Multiply(word a, word b)
238 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
239 r.m_whole = (dword)a * b;
240 #elif defined(MultiplyWordsLoHi)
241 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
248 static DWord MultiplyAndAdd(word a, word b, word c)
250 DWord r = Multiply(a, b);
254 DWord & operator+=(word a)
256 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
257 m_whole = m_whole + a;
260 m_halfs.high += (m_halfs.low < a);
265 DWord operator+(word a)
268 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
269 r.m_whole = m_whole + a;
271 r.m_halfs.low = m_halfs.low + a;
272 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
280 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
281 r.m_whole = m_whole - a.m_whole;
283 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
284 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
289 DWord operator-(word a)
292 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
293 r.m_whole = m_whole - a;
295 r.m_halfs.low = m_halfs.low - a;
296 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
302 word operator/(word divisor);
304 word operator%(word a);
306 bool operator!()
const
308 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
311 return !m_halfs.high && !m_halfs.low;
315 word GetLowHalf()
const {
return m_halfs.low;}
316 word GetHighHalf()
const {
return m_halfs.high;}
317 word GetHighHalfAsBorrow()
const {
return 0-m_halfs.high;}
322 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
327 #ifdef IS_LITTLE_ENDIAN
343 #if defined(__COVERITY__)
344 Word() : m_whole(0) {}
345 #elif !defined(NDEBUG)
347 Word() : m_whole(0) {memset(&m_whole, 0xaa,
sizeof(m_whole));}
352 Word(word value) : m_whole(value) {}
353 Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
355 static Word Multiply(hword a, hword b)
358 r.m_whole = (word)a * b;
365 r.m_whole = m_whole - a.m_whole;
369 Word operator-(hword a)
372 r.m_whole = m_whole - a;
377 hword operator/(hword divisor)
379 return hword(m_whole / divisor);
382 bool operator!()
const
387 word GetWhole()
const {
return m_whole;}
388 hword GetLowHalf()
const {
return hword(m_whole);}
389 hword GetHighHalf()
const {
return hword(m_whole>>(WORD_BITS/2));}
390 hword GetHighHalfAsBorrow()
const {
return 0-hword(m_whole>>(WORD_BITS/2));}
397 template <
class S,
class D>
398 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
400 CRYPTOPP_UNUSED(dummy);
403 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
410 Q = D(A[1], A[2]) / S(B1+1);
412 Q = D(A[0], A[1]) / B0;
415 D p = D::Multiply(B0, Q);
416 D u = (D) A[0] - p.GetLowHalf();
417 A[0] = u.GetLowHalf();
418 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
419 A[1] = u.GetLowHalf();
420 A[2] += u.GetHighHalf();
423 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
426 A[0] = u.GetLowHalf();
427 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
428 A[1] = u.GetLowHalf();
429 A[2] += u.GetHighHalf();
438 template <
class S,
class D>
439 inline D DivideFourWordsByTwo(S *T,
const D &Al,
const D &Ah,
const D &B)
442 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
446 T[0] = Al.GetLowHalf();
447 T[1] = Al.GetHighHalf();
448 T[2] = Ah.GetLowHalf();
449 T[3] = Ah.GetHighHalf();
450 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
451 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
452 return D(Q[0], Q[1]);
457 inline word DWord::operator/(word a)
459 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
460 return word(m_whole / a);
463 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
467 inline word DWord::operator%(word a)
469 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
470 return word(m_whole % a);
472 if (a < (word(1) << (WORD_BITS/2)))
475 word r = m_halfs.high % h;
476 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
477 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
482 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
483 return Word(r[0], r[1]).GetWhole();
491 #if defined(__GNUC__)
492 #define AddPrologue \
494 __asm__ __volatile__ \
497 #define AddEpilogue \
500 : "d" (C), "a" (A), "D" (B), "c" (N) \
501 : "%esi", "memory", "cc" \
504 #define MulPrologue \
505 __asm__ __volatile__ \
510 #define MulEpilogue \
514 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
515 : "%esi", "memory", "cc" \
517 #define SquPrologue MulPrologue
518 #define SquEpilogue \
522 : "d" (s_maskLow16), "c" (C), "a" (A) \
523 : "%esi", "%edi", "memory", "cc" \
525 #define TopPrologue MulPrologue
526 #define TopEpilogue \
530 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
534 #define AddPrologue \
537 __asm mov eax, [esp+12] \
538 __asm mov edi, [esp+16]
539 #define AddEpilogue \
544 #define SaveEBX __asm push ebx
545 #define RestoreEBX __asm pop ebx
550 #define SquPrologue \
554 AS2( lea ebx, s_maskLow16)
555 #define MulPrologue \
560 AS2( lea ebx, s_maskLow16)
561 #define TopPrologue \
567 AS2( lea ebx, s_maskLow16)
568 #define SquEpilogue RestoreEBX
569 #define MulEpilogue RestoreEBX
570 #define TopEpilogue RestoreEBX
573 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
575 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B);
576 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B);
578 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
579 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
587 AS2( mov %0,[%3+8*%1])
588 AS2( add %0,[%4+8*%1])
589 AS2( mov [%2+8*%1],%0)
591 AS2( mov %0,[%3+8*%1+8])
592 AS2( adc %0,[%4+8*%1+8])
593 AS2( mov [%2+8*%1+8],%0)
596 AS2( mov %0,[%3+8*%1])
597 AS2( adc %0,[%4+8*%1])
598 AS2( mov [%2+8*%1],%0)
604 :
"=&r" (result),
"+c" (N)
605 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
611 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
619 AS2( mov %0,[%3+8*%1])
620 AS2( sub %0,[%4+8*%1])
621 AS2( mov [%2+8*%1],%0)
623 AS2( mov %0,[%3+8*%1+8])
624 AS2( sbb %0,[%4+8*%1+8])
625 AS2( mov [%2+8*%1+8],%0)
628 AS2( mov %0,[%3+8*%1])
629 AS2( sbb %0,[%4+8*%1])
630 AS2( mov [%2+8*%1],%0)
636 :
"=&r" (result),
"+c" (N)
637 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
642 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
643 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
648 AS2( lea eax, [eax+4*ecx])
649 AS2( lea edi, [edi+4*ecx])
650 AS2( lea edx, [edx+4*ecx])
660 AS2( mov esi,[eax+4*ecx])
661 AS2( adc esi,[edi+4*ecx])
662 AS2( mov [edx+4*ecx],esi)
663 AS2( mov esi,[eax+4*ecx+4])
664 AS2( adc esi,[edi+4*ecx+4])
665 AS2( mov [edx+4*ecx+4],esi)
667 AS2( mov esi,[eax+4*ecx+8])
668 AS2( adc esi,[edi+4*ecx+8])
669 AS2( mov [edx+4*ecx+8],esi)
670 AS2( mov esi,[eax+4*ecx+12])
671 AS2( adc esi,[edi+4*ecx+12])
672 AS2( mov [edx+4*ecx+12],esi)
674 AS2( lea ecx,[ecx+4])
684 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C, const word *A, const word *B)
689 AS2( lea eax, [eax+4*ecx])
690 AS2( lea edi, [edi+4*ecx])
691 AS2( lea edx, [edx+4*ecx])
701 AS2( mov esi,[eax+4*ecx])
702 AS2( sbb esi,[edi+4*ecx])
703 AS2( mov [edx+4*ecx],esi)
704 AS2( mov esi,[eax+4*ecx+4])
705 AS2( sbb esi,[edi+4*ecx+4])
706 AS2( mov [edx+4*ecx+4],esi)
708 AS2( mov esi,[eax+4*ecx+8])
709 AS2( sbb esi,[edi+4*ecx+8])
710 AS2( mov [edx+4*ecx+8],esi)
711 AS2( mov esi,[eax+4*ecx+12])
712 AS2( sbb esi,[edi+4*ecx+12])
713 AS2( mov [edx+4*ecx+12],esi)
715 AS2( lea ecx,[ecx+4])
725 #if CRYPTOPP_INTEGER_SSE2
726 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Add(
size_t N, word *C,
const word *A,
const word *B)
731 AS2( lea eax, [eax+4*ecx])
732 AS2( lea edi, [edi+4*ecx])
733 AS2( lea edx, [edx+4*ecx])
744 AS2( movd mm0, DWORD PTR [eax+4*ecx])
745 AS2( movd mm1, DWORD PTR [edi+4*ecx])
748 AS2( movd DWORD PTR [edx+4*ecx], mm2)
751 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
752 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
755 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
759 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
760 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
763 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
766 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
767 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
770 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
782 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Sub(
size_t N, word *C, const word *A, const word *B)
787 AS2( lea eax, [eax+4*ecx])
788 AS2( lea edi, [edi+4*ecx])
789 AS2( lea edx, [edx+4*ecx])
800 AS2( movd mm0, DWORD PTR [eax+4*ecx])
801 AS2( movd mm1, DWORD PTR [edi+4*ecx])
804 AS2( movd DWORD PTR [edx+4*ecx], mm0)
807 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
808 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
811 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
815 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
816 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
819 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
822 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
823 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
826 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
838 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
840 int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
846 for (
size_t i=0; i<N; i+=2)
848 AddWithCarry(u, A[i], B[i]);
850 AddWithCarry(u, A[i+1], B[i+1]);
853 return int(GetCarry(u));
856 int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
862 for (
size_t i=0; i<N; i+=2)
864 SubtractWithBorrow(u, A[i], B[i]);
866 SubtractWithBorrow(u, A[i+1], B[i+1]);
869 return int(GetBorrow(u));
873 static word LinearMultiply(word *C,
const word *AA, word B,
size_t N)
876 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
879 for(
unsigned i=0; i<N; i++)
882 MultiplyWords(p, A[i], B);
883 Acc2WordsBy1(p, carry);
890 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
894 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
899 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
900 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
901 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
902 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
903 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
908 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
909 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
910 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
911 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
912 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
913 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
914 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
915 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
916 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
917 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
918 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
919 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
920 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
925 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
926 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
927 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
928 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
929 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
930 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
931 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
932 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
933 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
934 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
935 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
936 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
937 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
938 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
939 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
940 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
941 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
942 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
943 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
944 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
945 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
946 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
947 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
948 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
949 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
950 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
951 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
952 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
953 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
962 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
963 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
964 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
965 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
970 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
971 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
972 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
973 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
974 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
975 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
976 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
977 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
978 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
979 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
980 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
981 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
986 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
987 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
988 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
989 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
990 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
991 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
992 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
993 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
994 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
995 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
996 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
997 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
998 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
999 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1000 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1001 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1002 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1003 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1004 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1005 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1006 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1007 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1008 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1009 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1010 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1011 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1012 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1013 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1018 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1023 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1024 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1025 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1030 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1031 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1032 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1033 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1034 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1035 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1036 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1041 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1042 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1043 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1044 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1045 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1046 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1047 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1048 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1049 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1050 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1051 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1052 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1053 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1054 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1055 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1061 #define Mul_Begin(n) \
1065 MultiplyWords(p, A[0], B[0]) \
1066 AssignWord(c, LowWord(p)) \
1067 AssignWord(d, HighWord(p))
1069 #define Mul_Acc(i, j) \
1070 MultiplyWords(p, A[i], B[j]) \
1071 Acc2WordsBy1(c, LowWord(p)) \
1072 Acc2WordsBy1(d, HighWord(p))
1074 #define Mul_SaveAcc(k, i, j) \
1075 R[k] = LowWord(c); \
1076 Add2WordsBy1(c, d, HighWord(c)) \
1077 MultiplyWords(p, A[i], B[j]) \
1078 AssignWord(d, HighWord(p)) \
1079 Acc2WordsBy1(c, LowWord(p))
1081 #define Mul_End(n) \
1082 R[2*n-3] = LowWord(c); \
1083 Acc2WordsBy1(d, HighWord(c)) \
1084 MultiplyWords(p, A[n-1], B[n-1])\
1085 Acc2WordsBy2(d, p) \
1086 R[2*n-2] = LowWord(d); \
1087 R[2*n-1] = HighWord(d);
1089 #define Bot_SaveAcc(k, i, j) \
1090 R[k] = LowWord(c); \
1091 word e = LowWord(d) + HighWord(c); \
1094 #define Bot_Acc(i, j) \
1097 #define Bot_End(n) \
1100 #define Mul_Begin(n) \
1104 MultiplyWords(p, A[0], B[0]) \
1106 AssignWord(d, HighWord(p))
1108 #define Mul_Acc(i, j) \
1109 MulAcc(c, d, A[i], B[j])
1111 #define Mul_SaveAcc(k, i, j) \
1114 AssignWord(d, HighWord(d)) \
1115 MulAcc(c, d, A[i], B[j])
1117 #define Mul_End(k, i) \
1119 MultiplyWords(p, A[i], B[i]) \
1120 Acc2WordsBy2(p, d) \
1121 R[k+1] = LowWord(p); \
1122 R[k+2] = HighWord(p);
1124 #define Bot_SaveAcc(k, i, j) \
1129 #define Bot_Acc(i, j) \
1132 #define Bot_End(n) \
1136 #define Squ_Begin(n) \
1141 MultiplyWords(p, A[0], A[0]) \
1142 R[0] = LowWord(p); \
1143 AssignWord(e, HighWord(p)) \
1144 MultiplyWords(p, A[0], A[1]) \
1146 AssignWord(d, HighWord(p)) \
1149 #define Squ_NonDiag \
1152 #define Squ_SaveAcc(k, i, j) \
1153 Acc3WordsBy2(c, d, e) \
1155 MultiplyWords(p, A[i], A[j]) \
1157 AssignWord(d, HighWord(p)) \
1159 #define Squ_Acc(i, j) \
1160 MulAcc(c, d, A[i], A[j])
1162 #define Squ_Diag(i) \
1164 MulAcc(c, d, A[i], A[i])
1166 #define Squ_End(n) \
1167 Acc3WordsBy2(c, d, e) \
1169 MultiplyWords(p, A[n-1], A[n-1])\
1170 Acc2WordsBy2(p, e) \
1171 R[2*n-2] = LowWord(p); \
1172 R[2*n-1] = HighWord(p);
1175 void Baseline_Multiply2(word *R,
const word *AA,
const word *BB)
1178 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1179 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1184 void Baseline_Multiply4(word *R,
const word *AA,
const word *BB)
1187 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1188 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1193 void Baseline_Multiply8(word *R,
const word *AA,
const word *BB)
1196 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1197 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1202 void Baseline_Square2(word *R,
const word *AA)
1205 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1210 void Baseline_Square4(word *R,
const word *AA)
1213 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1218 void Baseline_Square8(word *R,
const word *AA)
1221 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1226 void Baseline_MultiplyBottom2(word *R,
const word *AA,
const word *BB)
1229 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1230 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1235 void Baseline_MultiplyBottom4(word *R,
const word *AA,
const word *BB)
1238 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1239 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1244 void Baseline_MultiplyBottom8(word *R,
const word *AA,
const word *BB)
1247 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1248 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1253 #define Top_Begin(n) \
1257 MultiplyWords(p, A[0], B[n-2]);\
1258 AssignWord(d, HighWord(p));
1260 #define Top_Acc(i, j) \
1261 MultiplyWords(p, A[i], B[j]);\
1262 Acc2WordsBy1(d, HighWord(p));
1264 #define Top_SaveAcc0(i, j) \
1266 AssignWord(d, HighWord(d)) \
1267 MulAcc(c, d, A[i], B[j])
1269 #define Top_SaveAcc1(i, j) \
1271 Acc2WordsBy1(d, c); \
1273 AssignWord(d, HighWord(d)) \
1274 MulAcc(c, d, A[i], B[j])
1276 void Baseline_MultiplyTop2(word *R,
const word *A,
const word *B, word L)
1280 Baseline_Multiply2(T, A, B);
1285 void Baseline_MultiplyTop4(word *R,
const word *AA,
const word *BB, word L)
1288 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1289 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1292 Top_Acc(1, 1) Top_Acc(2, 0) \
1293 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1294 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1295 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1299 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1302 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1303 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1306 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1307 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1308 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1309 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1310 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1311 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1312 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1313 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1317 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1318 void Baseline_Multiply16(word *R,
const word *AA,
const word *BB)
1321 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1322 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1327 void Baseline_Square16(word *R,
const word *AA)
1330 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1335 void Baseline_MultiplyBottom16(word *R,
const word *AA,
const word *BB)
1338 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1339 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1344 void Baseline_MultiplyTop16(word *R,
const word *AA,
const word *BB, word L)
1347 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1348 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1351 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1352 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1353 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1354 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1355 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1356 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1357 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1358 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1359 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1360 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1361 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1362 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1363 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1364 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1365 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1366 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1373 #if CRYPTOPP_INTEGER_SSE2
1375 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1393 #define SSE2_FinalSave(k) \
1394 AS2( psllq xmm5, 16) \
1395 AS2( paddq xmm4, xmm5) \
1396 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1398 #define SSE2_SaveShift(k) \
1399 AS2( movq xmm0, xmm6) \
1400 AS2( punpckhqdq xmm6, xmm0) \
1401 AS2( movq xmm1, xmm7) \
1402 AS2( punpckhqdq xmm7, xmm1) \
1403 AS2( paddd xmm6, xmm0) \
1404 AS2( pslldq xmm6, 4) \
1405 AS2( paddd xmm7, xmm1) \
1406 AS2( paddd xmm4, xmm6) \
1407 AS2( pslldq xmm7, 4) \
1408 AS2( movq xmm6, xmm4) \
1409 AS2( paddd xmm5, xmm7) \
1410 AS2( movq xmm7, xmm5) \
1411 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1412 AS2( psrlq xmm6, 16) \
1413 AS2( paddq xmm6, xmm7) \
1414 AS2( punpckhqdq xmm4, xmm0) \
1415 AS2( punpckhqdq xmm5, xmm0) \
1416 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1417 AS2( psrlq xmm6, 3*16) \
1418 AS2( paddd xmm4, xmm6) \
1420 #define Squ_SSE2_SaveShift(k) \
1421 AS2( movq xmm0, xmm6) \
1422 AS2( punpckhqdq xmm6, xmm0) \
1423 AS2( movq xmm1, xmm7) \
1424 AS2( punpckhqdq xmm7, xmm1) \
1425 AS2( paddd xmm6, xmm0) \
1426 AS2( pslldq xmm6, 4) \
1427 AS2( paddd xmm7, xmm1) \
1428 AS2( paddd xmm4, xmm6) \
1429 AS2( pslldq xmm7, 4) \
1430 AS2( movhlps xmm6, xmm4) \
1431 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1432 AS2( paddd xmm5, xmm7) \
1433 AS2( movhps QWORD PTR [esp+12], xmm5)\
1434 AS2( psrlq xmm4, 16) \
1435 AS2( paddq xmm4, xmm5) \
1436 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1437 AS2( psrlq xmm4, 3*16) \
1438 AS2( paddd xmm4, xmm6) \
1439 AS2( movq QWORD PTR [esp+4], xmm4)\
1441 #define SSE2_FirstMultiply(i) \
1442 AS2( movdqa xmm7, [esi+(i)*16])\
1443 AS2( movdqa xmm5, [edi-(i)*16])\
1444 AS2( pmuludq xmm5, xmm7) \
1445 AS2( movdqa xmm4, [ebx])\
1446 AS2( movdqa xmm6, xmm4) \
1447 AS2( pand xmm4, xmm5) \
1448 AS2( psrld xmm5, 16) \
1449 AS2( pmuludq xmm7, [edx-(i)*16])\
1450 AS2( pand xmm6, xmm7) \
1451 AS2( psrld xmm7, 16)
1453 #define Squ_Begin(n) \
1456 AS2( and esp, 0xfffffff0)\
1457 AS2( lea edi, [esp-32*n])\
1458 AS2( sub esp, 32*n+16)\
1460 AS2( mov esi, edi) \
1461 AS2( xor edx, edx) \
1463 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1464 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1465 AS2( movdqa [edi+2*edx], xmm0) \
1466 AS2( psrlq xmm0, 32) \
1467 AS2( movdqa [edi+2*edx+16], xmm0) \
1468 AS2( movdqa [edi+16*n+2*edx], xmm1) \
1469 AS2( psrlq xmm1, 32) \
1470 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1472 AS2( cmp edx, 8*(n)) \
1474 AS2( lea edx, [edi+16*n])\
1475 SSE2_FirstMultiply(0) \
1477 #define Squ_Acc(i) \
1479 AS2( movdqa xmm1, [esi+(i)*16]) \
1480 AS2( movdqa xmm0, [edi-(i)*16]) \
1481 AS2( movdqa xmm2, [ebx]) \
1482 AS2( pmuludq xmm0, xmm1) \
1483 AS2( pmuludq xmm1, [edx-(i)*16]) \
1484 AS2( movdqa xmm3, xmm2) \
1485 AS2( pand xmm2, xmm0) \
1486 AS2( psrld xmm0, 16) \
1487 AS2( paddd xmm4, xmm2) \
1488 AS2( paddd xmm5, xmm0) \
1489 AS2( pand xmm3, xmm1) \
1490 AS2( psrld xmm1, 16) \
1491 AS2( paddd xmm6, xmm3) \
1492 AS2( paddd xmm7, xmm1) \
1495 #define Squ_Acc2(i) ASC(call, LSqu##i)
1496 #define Squ_Acc3(i) Squ_Acc2(i)
1497 #define Squ_Acc4(i) Squ_Acc2(i)
1498 #define Squ_Acc5(i) Squ_Acc2(i)
1499 #define Squ_Acc6(i) Squ_Acc2(i)
1500 #define Squ_Acc7(i) Squ_Acc2(i)
1501 #define Squ_Acc8(i) Squ_Acc2(i)
1503 #define SSE2_End(E, n) \
1504 SSE2_SaveShift(2*(n)-3) \
1505 AS2( movdqa xmm7, [esi+16]) \
1506 AS2( movdqa xmm0, [edi]) \
1507 AS2( pmuludq xmm0, xmm7) \
1508 AS2( movdqa xmm2, [ebx]) \
1509 AS2( pmuludq xmm7, [edx]) \
1510 AS2( movdqa xmm6, xmm2) \
1511 AS2( pand xmm2, xmm0) \
1512 AS2( psrld xmm0, 16) \
1513 AS2( paddd xmm4, xmm2) \
1514 AS2( paddd xmm5, xmm0) \
1515 AS2( pand xmm6, xmm7) \
1516 AS2( psrld xmm7, 16) \
1517 SSE2_SaveShift(2*(n)-2) \
1518 SSE2_FinalSave(2*(n)-1) \
1522 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1523 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1524 #define Top_End(n) SSE2_End(TopEpilogue, n)
1526 #define Squ_Column1(k, i) \
1527 Squ_SSE2_SaveShift(k) \
1529 SSE2_FirstMultiply(1)\
1531 AS2( paddd xmm4, xmm4) \
1532 AS2( paddd xmm5, xmm5) \
1533 AS2( movdqa xmm3, [esi]) \
1534 AS2( movq xmm1, QWORD PTR [esi+8]) \
1535 AS2( pmuludq xmm1, xmm3) \
1536 AS2( pmuludq xmm3, xmm3) \
1537 AS2( movdqa xmm0, [ebx])\
1538 AS2( movdqa xmm2, xmm0) \
1539 AS2( pand xmm0, xmm1) \
1540 AS2( psrld xmm1, 16) \
1541 AS2( paddd xmm6, xmm0) \
1542 AS2( paddd xmm7, xmm1) \
1543 AS2( pand xmm2, xmm3) \
1544 AS2( psrld xmm3, 16) \
1545 AS2( paddd xmm6, xmm6) \
1546 AS2( paddd xmm7, xmm7) \
1547 AS2( paddd xmm4, xmm2) \
1548 AS2( paddd xmm5, xmm3) \
1549 AS2( movq xmm0, QWORD PTR [esp+4])\
1550 AS2( movq xmm1, QWORD PTR [esp+12])\
1551 AS2( paddd xmm4, xmm0)\
1552 AS2( paddd xmm5, xmm1)\
1554 #define Squ_Column0(k, i) \
1555 Squ_SSE2_SaveShift(k) \
1558 SSE2_FirstMultiply(1)\
1560 AS2( paddd xmm6, xmm6) \
1561 AS2( paddd xmm7, xmm7) \
1562 AS2( paddd xmm4, xmm4) \
1563 AS2( paddd xmm5, xmm5) \
1564 AS2( movq xmm0, QWORD PTR [esp+4])\
1565 AS2( movq xmm1, QWORD PTR [esp+12])\
1566 AS2( paddd xmm4, xmm0)\
1567 AS2( paddd xmm5, xmm1)\
1569 #define SSE2_MulAdd45 \
1570 AS2( movdqa xmm7, [esi]) \
1571 AS2( movdqa xmm0, [edi]) \
1572 AS2( pmuludq xmm0, xmm7) \
1573 AS2( movdqa xmm2, [ebx]) \
1574 AS2( pmuludq xmm7, [edx]) \
1575 AS2( movdqa xmm6, xmm2) \
1576 AS2( pand xmm2, xmm0) \
1577 AS2( psrld xmm0, 16) \
1578 AS2( paddd xmm4, xmm2) \
1579 AS2( paddd xmm5, xmm0) \
1580 AS2( pand xmm6, xmm7) \
1581 AS2( psrld xmm7, 16)
1583 #define Mul_Begin(n) \
1586 AS2( and esp, 0xfffffff0)\
1587 AS2( sub esp, 48*n+16)\
1589 AS2( xor edx, edx) \
1591 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1592 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1593 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1594 AS2( movdqa [esp+20+2*edx], xmm0) \
1595 AS2( psrlq xmm0, 32) \
1596 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1597 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1598 AS2( psrlq xmm1, 32) \
1599 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1600 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1601 AS2( psrlq xmm2, 32) \
1602 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1604 AS2( cmp edx, 8*(n)) \
1606 AS2( lea edi, [esp+20])\
1607 AS2( lea edx, [esp+20+16*n])\
1608 AS2( lea esi, [esp+20+32*n])\
1609 SSE2_FirstMultiply(0) \
1611 #define Mul_Acc(i) \
1613 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1614 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1615 AS2( movdqa xmm2, [ebx]) \
1616 AS2( pmuludq xmm0, xmm1) \
1617 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1618 AS2( movdqa xmm3, xmm2) \
1619 AS2( pand xmm2, xmm0) \
1620 AS2( psrld xmm0, 16) \
1621 AS2( paddd xmm4, xmm2) \
1622 AS2( paddd xmm5, xmm0) \
1623 AS2( pand xmm3, xmm1) \
1624 AS2( psrld xmm1, 16) \
1625 AS2( paddd xmm6, xmm3) \
1626 AS2( paddd xmm7, xmm1) \
1629 #define Mul_Acc2(i) ASC(call, LMul##i)
1630 #define Mul_Acc3(i) Mul_Acc2(i)
1631 #define Mul_Acc4(i) Mul_Acc2(i)
1632 #define Mul_Acc5(i) Mul_Acc2(i)
1633 #define Mul_Acc6(i) Mul_Acc2(i)
1634 #define Mul_Acc7(i) Mul_Acc2(i)
1635 #define Mul_Acc8(i) Mul_Acc2(i)
1636 #define Mul_Acc9(i) Mul_Acc2(i)
1637 #define Mul_Acc10(i) Mul_Acc2(i)
1638 #define Mul_Acc11(i) Mul_Acc2(i)
1639 #define Mul_Acc12(i) Mul_Acc2(i)
1640 #define Mul_Acc13(i) Mul_Acc2(i)
1641 #define Mul_Acc14(i) Mul_Acc2(i)
1642 #define Mul_Acc15(i) Mul_Acc2(i)
1643 #define Mul_Acc16(i) Mul_Acc2(i)
1645 #define Mul_Column1(k, i) \
1651 #define Mul_Column0(k, i) \
1658 #define Bot_Acc(i) \
1659 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1660 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1661 AS2( pmuludq xmm0, xmm1) \
1662 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1663 AS2( paddq xmm4, xmm0) \
1664 AS2( paddd xmm6, xmm1)
1666 #define Bot_SaveAcc(k) \
1670 AS2( movdqa xmm6, [esi]) \
1671 AS2( movdqa xmm0, [edi]) \
1672 AS2( pmuludq xmm0, xmm6) \
1673 AS2( paddq xmm4, xmm0) \
1674 AS2( psllq xmm5, 16) \
1675 AS2( paddq xmm4, xmm5) \
1676 AS2( pmuludq xmm6, [edx])
1678 #define Bot_End(n) \
1679 AS2( movhlps xmm7, xmm6) \
1680 AS2( paddd xmm6, xmm7) \
1681 AS2( psllq xmm6, 32) \
1682 AS2( paddd xmm4, xmm6) \
1683 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1687 #define Top_Begin(n) \
1690 AS2( and esp, 0xfffffff0)\
1691 AS2( sub esp, 48*n+16)\
1693 AS2( xor edx, edx) \
1695 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1696 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1697 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1698 AS2( movdqa [esp+20+2*edx], xmm0) \
1699 AS2( psrlq xmm0, 32) \
1700 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1701 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1702 AS2( psrlq xmm1, 32) \
1703 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1704 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1705 AS2( psrlq xmm2, 32) \
1706 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1708 AS2( cmp edx, 8*(n)) \
1710 AS2( mov eax, esi) \
1711 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1712 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1713 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1714 AS2( pxor xmm4, xmm4)\
1715 AS2( pxor xmm5, xmm5)
1717 #define Top_Acc(i) \
1718 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1719 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1720 AS2( psrlq xmm0, 48) \
1721 AS2( paddd xmm5, xmm0)\
1723 #define Top_Column0(i) \
1724 AS2( psllq xmm5, 32) \
1730 #define Top_Column1(i) \
1736 AS2( movd xmm0, eax)\
1737 AS2( movd xmm1, [ecx+4])\
1738 AS2( psrld xmm1, 16)\
1739 AS2( pcmpgtd xmm1, xmm0)\
1740 AS2( psrld xmm1, 31)\
1741 AS2( paddd xmm4, xmm1)\
1743 void SSE2_Square4(word *C,
const word *A)
1750 void SSE2_Square8(word *C, const word *A)
1766 void SSE2_Square16(word *C, const word *A)
1771 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1790 void SSE2_Square32(word *C, const word *A)
1794 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1828 void SSE2_Multiply4(word *C, const word *A, const word *B)
1840 void SSE2_Multiply8(word *C, const word *A, const word *B)
1845 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1856 void SSE2_Multiply16(word *C, const word *A, const word *B)
1861 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1880 void SSE2_Multiply32(word *C, const word *A, const word *B)
1884 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1918 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1921 Bot_SaveAcc(0) Bot_Acc(2)
1925 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1930 Mul_Acc(3) Mul_Acc(2)
1935 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1939 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1944 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1953 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1957 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1962 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1979 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1983 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1986 Top_Acc(3) Top_Acc(2) Top_Acc(1)
1989 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1998 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2001 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2004 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2017 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2020 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2023 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2044 #endif // #if CRYPTOPP_INTEGER_SSE2
2048 typedef int (CRYPTOPP_FASTCALL * PAdd)(
size_t N, word *C,
const word *A,
const word *B);
2049 typedef void (* PMul)(word *C,
const word *A,
const word *B);
2050 typedef void (* PSqu)(word *C,
const word *A);
2051 typedef void (* PMulTop)(word *C,
const word *A,
const word *B, word L);
2053 #if CRYPTOPP_INTEGER_SSE2
2054 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2055 static size_t s_recursionLimit = 8;
2057 static const size_t s_recursionLimit = 16;
2060 static PMul s_pMul[9], s_pBot[9];
2061 static PSqu s_pSqu[9];
2062 static PMulTop s_pTop[9];
2064 static void SetFunctionPointers()
2066 s_pMul[0] = &Baseline_Multiply2;
2067 s_pBot[0] = &Baseline_MultiplyBottom2;
2068 s_pSqu[0] = &Baseline_Square2;
2069 s_pTop[0] = &Baseline_MultiplyTop2;
2070 s_pTop[1] = &Baseline_MultiplyTop4;
2072 #if CRYPTOPP_INTEGER_SSE2
2075 #if _MSC_VER != 1200 || defined(NDEBUG)
2083 s_recursionLimit = 32;
2085 s_pMul[1] = &SSE2_Multiply4;
2086 s_pMul[2] = &SSE2_Multiply8;
2087 s_pMul[4] = &SSE2_Multiply16;
2088 s_pMul[8] = &SSE2_Multiply32;
2090 s_pBot[1] = &SSE2_MultiplyBottom4;
2091 s_pBot[2] = &SSE2_MultiplyBottom8;
2092 s_pBot[4] = &SSE2_MultiplyBottom16;
2093 s_pBot[8] = &SSE2_MultiplyBottom32;
2095 s_pSqu[1] = &SSE2_Square4;
2096 s_pSqu[2] = &SSE2_Square8;
2097 s_pSqu[4] = &SSE2_Square16;
2098 s_pSqu[8] = &SSE2_Square32;
2100 s_pTop[2] = &SSE2_MultiplyTop8;
2101 s_pTop[4] = &SSE2_MultiplyTop16;
2102 s_pTop[8] = &SSE2_MultiplyTop32;
2107 s_pMul[1] = &Baseline_Multiply4;
2108 s_pMul[2] = &Baseline_Multiply8;
2110 s_pBot[1] = &Baseline_MultiplyBottom4;
2111 s_pBot[2] = &Baseline_MultiplyBottom8;
2113 s_pSqu[1] = &Baseline_Square4;
2114 s_pSqu[2] = &Baseline_Square8;
2116 s_pTop[2] = &Baseline_MultiplyTop8;
2118 #if !CRYPTOPP_INTEGER_SSE2
2119 s_pMul[4] = &Baseline_Multiply16;
2120 s_pBot[4] = &Baseline_MultiplyBottom16;
2121 s_pSqu[4] = &Baseline_Square16;
2122 s_pTop[4] = &Baseline_MultiplyTop16;
2127 inline int Add(word *C,
const word *A,
const word *B,
size_t N)
2129 #if CRYPTOPP_INTEGER_SSE2
2130 return s_pAdd(N, C, A, B);
2132 return Baseline_Add(N, C, A, B);
2136 inline int Subtract(word *C,
const word *A,
const word *B,
size_t N)
2138 #if CRYPTOPP_INTEGER_SSE2
2139 return s_pSub(N, C, A, B);
2141 return Baseline_Sub(N, C, A, B);
2168 void RecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2170 assert(N>=2 && N%2==0);
2172 if (N <= s_recursionLimit)
2173 s_pMul[N/4](R, A, B);
2176 const size_t N2 = N/2;
2178 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2179 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2181 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2182 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2184 RecursiveMultiply(R2, T2, A1, B1, N2);
2185 RecursiveMultiply(T0, T2, R0, R1, N2);
2186 RecursiveMultiply(R0, T2, A0, B0, N2);
2190 int c2 = Add(R2, R2, R1, N2);
2192 c2 += Add(R1, R2, R0, N2);
2193 c3 += Add(R2, R2, R3, N2);
2196 c3 -= Subtract(R1, R1, T0, N);
2198 c3 += Add(R1, R1, T0, N);
2200 c3 += Increment(R2, N2, c2);
2201 assert (c3 >= 0 && c3 <= 2);
2202 Increment(R3, N2, c3);
2210 void RecursiveSquare(word *R, word *T,
const word *A,
size_t N)
2212 assert(N && N%2==0);
2214 if (N <= s_recursionLimit)
2218 const size_t N2 = N/2;
2220 RecursiveSquare(R0, T2, A0, N2);
2221 RecursiveSquare(R2, T2, A1, N2);
2222 RecursiveMultiply(T0, T2, A0, A1, N2);
2224 int carry = Add(R1, R1, T0, N);
2225 carry += Add(R1, R1, T0, N);
2226 Increment(R3, N2, carry);
2235 void RecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2237 assert(N>=2 && N%2==0);
2239 if (N <= s_recursionLimit)
2240 s_pBot[N/4](R, A, B);
2243 const size_t N2 = N/2;
2245 RecursiveMultiply(R, T, A0, B0, N2);
2246 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2247 Add(R1, R1, T0, N2);
2248 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2249 Add(R1, R1, T0, N2);
2259 void MultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
size_t N)
2261 assert(N>=2 && N%2==0);
2263 if (N <= s_recursionLimit)
2264 s_pTop[N/4](R, A, B, L[N-1]);
2267 const size_t N2 = N/2;
2269 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2270 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2272 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2273 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2275 RecursiveMultiply(T0, T2, R0, R1, N2);
2276 RecursiveMultiply(R0, T2, A1, B1, N2);
2281 int c2 = Subtract(T2, L+N2, L, N2);
2285 c2 -= Add(T2, T2, T0, N2);
2286 t = (Compare(T2, R0, N2) == -1);
2287 c3 = t - Subtract(T2, T2, T1, N2);
2291 c2 += Subtract(T2, T2, T0, N2);
2292 t = (Compare(T2, R0, N2) == -1);
2293 c3 = t + Add(T2, T2, T1, N2);
2298 c3 += Increment(T2, N2, c2);
2300 c3 -= Decrement(T2, N2, -c2);
2301 c3 += Add(R0, T2, R1, N2);
2303 assert (c3 >= 0 && c3 <= 2);
2304 Increment(R1, N2, c3);
2308 inline void Multiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2310 RecursiveMultiply(R, T, A, B, N);
2313 inline void Square(word *R, word *T,
const word *A,
size_t N)
2315 RecursiveSquare(R, T, A, N);
2318 inline void MultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2320 RecursiveMultiplyBottom(R, T, A, B, N);
2328 void AsymmetricMultiply(word *R, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2335 Multiply(R, T, A, B, NA);
2346 assert(NB % NA == 0);
2353 SetWords(R, 0, NB+2);
2356 CopyWords(R, B, NB);
2357 R[NB] = R[NB+1] = 0;
2360 R[NB] = LinearMultiply(R, B, A[0], NB);
2369 Multiply(R, T, A, B, NA);
2370 CopyWords(T+2*NA, R+NA, NA);
2372 for (i=2*NA; i<NB; i+=2*NA)
2373 Multiply(T+NA+i, T, A, B+i, NA);
2374 for (i=NA; i<NB; i+=2*NA)
2375 Multiply(R+i, T, A, B+i, NA);
2379 for (i=0; i<NB; i+=2*NA)
2380 Multiply(R+i, T, A, B+i, NA);
2381 for (i=NA; i<NB; i+=2*NA)
2382 Multiply(T+NA+i, T, A, B+i, NA);
2385 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2386 Increment(R+NB, NA);
2393 void RecursiveInverseModPower2(word *R, word *T,
const word *A,
size_t N)
2397 T[0] = AtomicInverseModPower2(A[0]);
2399 s_pBot[0](T+2, T, A);
2400 TwosComplement(T+2, 2);
2401 Increment(T+2, 2, 2);
2402 s_pBot[0](R, T, T+2);
2406 const size_t N2 = N/2;
2407 RecursiveInverseModPower2(R0, T0, A0, N2);
2409 SetWords(T0+1, 0, N2-1);
2410 MultiplyTop(R1, T1, T0, R0, A0, N2);
2411 MultiplyBottom(T0, T1, R0, A1, N2);
2412 Add(T0, R1, T0, N2);
2413 TwosComplement(T0, N2);
2414 MultiplyBottom(R1, T1, R0, T0, N2);
2424 void MontgomeryReduce(word *R, word *T, word *X,
const word *M,
const word *U,
size_t N)
2427 MultiplyBottom(R, T, X, U, N);
2428 MultiplyTop(T, T+N, X, R, M, N);
2429 word borrow = Subtract(T, X+N, T, N);
2431 word carry = Add(T+N, T, M, N);
2432 assert(carry | !borrow);
2433 CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2434 CopyWords(R, T + ((0-borrow) & N), N);
2436 const word u = 0-U[0];
2438 for (
size_t i=0; i<N; i++)
2440 const word t = u * X[i];
2442 for (
size_t j=0; j<N; j+=2)
2444 MultiplyWords(p, t, M[j]);
2445 Acc2WordsBy1(p, X[i+j]);
2447 X[i+j] = LowWord(p);
2449 MultiplyWords(p, t, M[j+1]);
2450 Acc2WordsBy1(p, X[i+j+1]);
2452 X[i+j+1] = LowWord(p);
2456 if (Increment(X+N+i, N-i, c))
2457 while (!Subtract(X+N, X+N, M, N)) {}
2460 memcpy(R, X+N, N*WORD_SIZE);
2462 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2463 for (
size_t i=0; i<N; i++)
2465 __m64 t = _mm_cvtsi32_si64(X[i]);
2466 t = _mm_mul_su32(t, u);
2467 __m64 c = _mm_setzero_si64();
2468 for (
size_t j=0; j<N; j+=2)
2470 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2471 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2472 c = _mm_add_si64(c, p);
2473 X[i+j] = _mm_cvtsi64_si32(c);
2474 c = _mm_srli_si64(c, 32);
2475 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2476 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2477 c = _mm_add_si64(c, p);
2478 X[i+j+1] = _mm_cvtsi64_si32(c);
2479 c = _mm_srli_si64(c, 32);
2482 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2483 while (!Subtract(X+N, X+N, M, N)) {}
2486 memcpy(R, X+N, N*WORD_SIZE);
2498 void HalfMontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
const word *V,
size_t N)
2500 assert(N%2==0 && N>=4);
2512 const size_t N2 = N/2;
2513 Multiply(T0, T2, V0, X3, N2);
2514 int c2 = Add(T0, T0, X0, N);
2515 MultiplyBottom(T3, T2, T0, U, N2);
2516 MultiplyTop(T2, R, T0, T3, M0, N2);
2517 c2 -= Subtract(T2, T1, T2, N2);
2518 Multiply(T0, R, T3, M1, N2);
2519 c2 -= Subtract(T0, T2, T0, N2);
2520 int c3 = -(int)Subtract(T1, X2, T1, N2);
2521 Multiply(R0, T2, V1, X3, N2);
2522 c3 += Add(R, R, T, N);
2525 c3 += Increment(R1, N2);
2527 c3 -= Decrement(R1, N2, -c2);
2529 assert(c3>=-1 && c3<=1);
2531 Subtract(R, R, M, N);
2625 static inline void AtomicDivide(word *Q,
const word *A,
const word *B)
2628 DWord q = DivideFourWordsByTwo<word, DWord>(T,
DWord(A[0], A[1]),
DWord(A[2], A[3]),
DWord(B[0], B[1]));
2629 Q[0] = q.GetLowHalf();
2630 Q[1] = q.GetHighHalf();
2636 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2640 assert(memcmp(P, A, 4*WORD_SIZE)==0);
2646 static void CorrectQuotientEstimate(word *R, word *T, word *Q,
const word *B,
size_t N)
2648 assert(N && N%2==0);
2650 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2652 word borrow = Subtract(R, R, T, N+2);
2653 assert(!borrow && !R[N+1]);
2654 CRYPTOPP_UNUSED(borrow);
2656 while (R[N] || Compare(R, B, N) >= 0)
2658 R[N] -= Subtract(R, R, B, N);
2659 Q[1] += (++Q[0]==0);
2660 assert(Q[0] || Q[1]);
2670 void Divide(word *R, word *Q, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2672 assert(NA && NB && NA%2==0 && NB%2==0);
2673 assert(B[NB-1] || B[NB-2]);
2678 word *
const TB=T+NA+2;
2679 word *
const TP=T+NA+2+NB;
2682 unsigned shiftWords = (B[NB-1]==0);
2683 TB[0] = TB[NB-1] = 0;
2684 CopyWords(TB+shiftWords, B, NB-shiftWords);
2685 unsigned shiftBits = WORD_BITS -
BitPrecision(TB[NB-1]);
2686 assert(shiftBits < WORD_BITS);
2687 ShiftWordsLeftByBits(TB, NB, shiftBits);
2690 TA[0] = TA[NA] = TA[NA+1] = 0;
2691 CopyWords(TA+shiftWords, A, NA);
2692 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2694 if (TA[NA+1]==0 && TA[NA] <= 1)
2696 Q[NA-NB+1] = Q[NA-NB] = 0;
2697 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2699 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2706 assert(Compare(TA+NA-NB, TB, NB) < 0);
2710 BT[0] = TB[NB-2] + 1;
2711 BT[1] = TB[NB-1] + (BT[0]==0);
2714 for (
size_t i=NA-2; i>=NB; i-=2)
2716 AtomicDivide(Q+i-NB, TA+i-2, BT);
2717 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2721 CopyWords(R, TA+shiftWords, NB);
2722 ShiftWordsRightByBits(R, NB, shiftBits);
2725 static inline size_t EvenWordCount(
const word *X,
size_t N)
2727 while (N && X[N-2]==0 && X[N-1]==0)
2738 unsigned int AlmostInverse(word *R, word *T,
const word *A,
size_t NA,
const word *M,
size_t N)
2740 assert(NA<=N && N && N%2==0);
2746 size_t bcLen=2, fgLen=EvenWordCount(M, N);
2750 SetWords(T, 0, 3*N);
2752 CopyWords(f, A, NA);
2760 if (EvenWordCount(f, fgLen)==0)
2766 ShiftWordsRightByWords(f, fgLen, 1);
2767 bcLen += 2 * (c[bcLen-1] != 0);
2769 ShiftWordsLeftByWords(c, bcLen, 1);
2779 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2782 Subtract(R, M, b, N);
2788 ShiftWordsRightByBits(f, fgLen, i);
2789 t = ShiftWordsLeftByBits(c, bcLen, i);
2791 bcLen += 2 * (t!=0);
2794 bool swap = Compare(f, g, fgLen)==-1;
2799 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2801 Subtract(f, f, g, fgLen);
2802 t = Add(b, b, c, bcLen);
2813 void DivideByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2820 ShiftWordsRightByBits(R, N, 1);
2823 word carry = Add(R, R, M, N);
2824 ShiftWordsRightByBits(R, N, 1);
2825 R[N-1] += carry<<(WORD_BITS-1);
2834 void MultiplyByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2839 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2840 Subtract(R, R, M, N);
2845 InitializeInteger::InitializeInteger()
2847 if (!g_pAssignIntToInteger)
2849 SetFunctionPointers();
2850 g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2854 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2856 static inline size_t RoundupSize(
size_t n)
2859 return RoundupSizeTable[n];
2870 : reg(2), sign(POSITIVE)
2872 reg[0] = reg[1] = 0;
2876 : reg(RoundupSize(t.WordCount())), sign(t.sign)
2878 CopyWords(reg, t.reg, reg.
size());
2884 reg[0] = word(value);
2898 reg[0] = word(value);
2914 unsigned long value = (
unsigned long)reg[0];
2918 return (
signed long)value >= 0;
2920 return -(
signed long)value < 0;
2927 unsigned long value = (
unsigned long)reg[0];
2929 return sign==
POSITIVE ? value : -(
signed long)value;
2934 Decode(encodedInteger, byteCount, s);
2944 encodedInteger.
Get(block, block.size());
2945 std::reverse(block.begin(), block.begin()+block.size());
2947 Decode(block.begin(), block.size(), s);
2951 Decode(encodedInteger, byteCount, s);
2956 Decode(encodedInteger, byteCount, s);
2966 #if (CRYPTOPP_MSC_VERSION >= 1400)
2967 std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2968 stdext::make_checked_array_iterator(block.begin(), block.size()));
2970 std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2972 Decode(block.begin(), block.size(), s);
2976 Decode(encodedInteger, byteCount, s);
2991 if (!
Randomize(rng, min, max, rnType, equiv, mod))
3026 bool Integer::operator!()
const
3035 if (reg.
size() != t.reg.
size() || t.reg[t.reg.
size()/2] == 0)
3037 CopyWords(reg, t.reg, reg.
size());
3045 if (n/WORD_BITS >= reg.
size())
3048 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3056 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3060 if (n/WORD_BITS < reg.
size())
3061 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3067 if (n/WORD_SIZE >= reg.
size())
3070 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3076 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3077 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3083 assert(n <=
sizeof(v)*8);
3084 for (
unsigned int j=0; j<n; j++)
3085 v |= lword(
GetBit(i+j)) << j;
3089 Integer Integer::operator-()
const
3096 Integer Integer::AbsoluteValue()
const
3106 std::swap(sign, a.sign);
3110 : reg(RoundupSize(length)), sign(POSITIVE)
3113 SetWords(reg+1, 0, reg.
size()-1);
3121 int radix, sign = 1;
3124 unsigned int length;
3125 for (length = 0; str[length] != 0; length++) {}
3132 switch (str[length-1])
3154 str += 1, length -= 1;
3157 if (length > 2 && str[0] ==
'0' && (str[1] ==
'x' || str[1] ==
'X'))
3160 str += 2, length -= 2;
3165 for (
unsigned int i=0; i<length; i++)
3167 int digit, ch =
static_cast<int>(str[i]);
3169 if (ch >=
'0' && ch <=
'9')
3171 else if (ch >=
'A' && ch <=
'F')
3172 digit = ch -
'A' + 10;
3173 else if (ch >=
'a' && ch <=
'f')
3174 digit = ch -
'a' + 10;
3188 unsigned int nh = 0, nl = 0, nc = 0;
3191 for (
unsigned int i=0; i<length; i++)
3193 int digit, ch =
static_cast<int>(str[i]);
3195 if (ch >=
'0' && ch <=
'9')
3197 else if (ch >=
'A' && ch <=
'F')
3198 digit = ch -
'A' + 10;
3199 else if (ch >=
'a' && ch <=
'f')
3200 digit = ch -
'a' + 10;
3213 v += position * (nh << 4 | nl);
3214 nc = 0, position <<= 8;
3224 for (
int i=
static_cast<int>(length)-1; i>=0; i--)
3226 int digit, ch =
static_cast<int>(str[i]);
3228 if (ch >=
'0' && ch <=
'9')
3230 else if (ch >=
'A' && ch <=
'F')
3231 digit = ch -
'A' + 10;
3232 else if (ch >=
'a' && ch <=
'f')
3233 digit = ch -
'a' + 10;
3252 : reg(2), sign(POSITIVE)
3254 *
this = StringToInteger(str);
3258 : reg(2), sign(POSITIVE)
3260 *
this = StringToInteger(str,order);
3264 : reg(2), sign(POSITIVE)
3266 *
this = StringToInteger(str);
3270 : reg(2), sign(POSITIVE)
3272 *
this = StringToInteger(str,order);
3277 return (
unsigned int)CountWords(reg, reg.
size());
3284 return (wordCount-1)*WORD_SIZE +
BytePrecision(reg[wordCount-1]);
3293 return (wordCount-1)*WORD_BITS +
BitPrecision(reg[wordCount-1]);
3301 Decode(store, inputLen, s);
3312 while (inputLen>0 && (sign==
POSITIVE ? b==0 : b==0xff))
3320 const size_t size = RoundupSize(
BytesToWords(inputLen));
3324 for (
size_t i=inputLen; i > 0; i--)
3327 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3332 for (
size_t i=inputLen; i<reg.
size()*WORD_SIZE; i++)
3333 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3334 TwosComplement(reg, reg.
size());
3352 assert(output && outputLen);
3354 Encode(sink, outputLen, signedness);
3361 for (
size_t i=outputLen; i > 0; i--)
3388 if (!dec.IsDefiniteLength() || dec.
MaxRetrievable() < dec.RemainingLength())
3404 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3418 word16 bitCount = word16(
BitCount());
3422 return 2 + byteCount;
3441 const size_t nbytes = nbits/8 + 1;
3445 buf[0] = (byte)
Crop(buf[0], nbits % 8);
3455 const unsigned int nbits = range.
BitCount();
3461 while (*
this > range);
3468 return GenerateRandomNoThrow(rng,
MakeParameters(
"Min", min)(
"Max", max)(
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
3474 KDF2_RNG(
const byte *seed,
size_t seedSize)
3475 : m_counter(0), m_counterAndSeed(seedSize + 4)
3477 memcpy(m_counterAndSeed + 4, seed, seedSize);
3511 throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
3530 bq.
Get(finalSeed, finalSeed.size());
3531 kdf2Rng.reset(
new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3542 Integer min1 = min + (equiv-min)%mod;
3563 if (
FirstPrime(first, max, equiv, mod, pSelector))
3567 if (!
FirstPrime(first, max, equiv, mod, pSelector))
3575 if (
FirstPrime(*
this,
STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3585 std::istream& operator>>(std::istream& in,
Integer &a)
3588 unsigned int length = 0;
3597 if (length >= str.
size())
3598 str.
Grow(length + 16);
3600 while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<=
'9') || (c>=
'a' && c<=
'f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
3604 str[length-1] =
'\0';
3610 std::ostream& operator<<(std::ostream& out,
const Integer &a)
3613 const long f = out.flags() & std::ios::basefield;
3618 case std::ios::oct :
3623 case std::ios::hex :
3645 static const char upper[]=
"0123456789ABCDEF";
3646 static const char lower[]=
"0123456789abcdef";
3648 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3667 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3668 if(out.flags() & std::ios_base::showbase)
3673 return out << suffix;
3677 Integer& Integer::operator++()
3681 if (Increment(reg, reg.
size()))
3684 reg[reg.
size()/2]=1;
3689 word borrow = Decrement(reg, reg.
size());
3691 CRYPTOPP_UNUSED(borrow);
3699 Integer& Integer::operator--()
3703 if (Increment(reg, reg.
size()))
3706 reg[reg.
size()/2]=1;
3711 if (Decrement(reg, reg.
size()))
3720 if (a.reg.size() == b.reg.size())
3721 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3722 else if (a.reg.size() > b.reg.size())
3724 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3725 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3726 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3730 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3731 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3732 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3738 sum.reg[sum.reg.
size()/2] = 1;
3745 unsigned aSize = a.WordCount();
3747 unsigned bSize = b.WordCount();
3752 if (Compare(a.reg, b.reg, aSize) >= 0)
3754 Subtract(diff.reg, a.reg, b.reg, aSize);
3759 Subtract(diff.reg, b.reg, a.reg, aSize);
3763 else if (aSize > bSize)
3765 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3766 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3767 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3773 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3774 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3775 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3782 template <
class T>
inline const T& STDMAX2(
const T& a,
const T& b)
3784 return a < b ? b : a;
3789 Integer sum((word)0, STDMAX2(reg.
size(), b.reg.size()));
3792 if (b.NotNegative())
3793 PositiveAdd(sum, *
this, b);
3795 PositiveSubtract(sum, *
this, b);
3799 if (b.NotNegative())
3800 PositiveSubtract(sum, b, *
this);
3803 PositiveAdd(sum, *
this, b);
3816 PositiveAdd(*
this, *
this, t);
3818 PositiveSubtract(*
this, *
this, t);
3823 PositiveSubtract(*
this, t, *
this);
3826 PositiveAdd(*
this, *
this, t);
3835 Integer diff((word)0, STDMAX2(reg.
size(), b.reg.size()));
3838 if (b.NotNegative())
3839 PositiveSubtract(diff, *
this, b);
3841 PositiveAdd(diff, *
this, b);
3845 if (b.NotNegative())
3847 PositiveAdd(diff, *
this, b);
3851 PositiveSubtract(diff, b, *
this);
3862 PositiveSubtract(*
this, *
this, t);
3864 PositiveAdd(*
this, *
this, t);
3870 PositiveAdd(*
this, *
this, t);
3874 PositiveSubtract(*
this, t, *
this);
3879 Integer& Integer::operator<<=(
size_t n)
3882 const size_t shiftWords = n / WORD_BITS;
3883 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
3886 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3887 ShiftWordsLeftByBits(reg+shiftWords, wordCount+
BitsToWords(shiftBits), shiftBits);
3891 Integer& Integer::operator>>=(
size_t n)
3894 const size_t shiftWords = n / WORD_BITS;
3895 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
3897 ShiftWordsRightByWords(reg, wordCount, shiftWords);
3898 if (wordCount > shiftWords)
3899 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3907 size_t aSize = RoundupSize(a.WordCount());
3908 size_t bSize = RoundupSize(b.WordCount());
3910 product.reg.
CleanNew(RoundupSize(aSize+bSize));
3914 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3919 PositiveMultiply(product, a, b);
3921 if (a.NotNegative() != b.NotNegative())
3928 Multiply(product, *
this, b);
3957 unsigned aSize = a.WordCount();
3958 unsigned bSize = b.WordCount();
3974 remainder.reg.
CleanNew(RoundupSize(bSize));
3976 quotient.reg.
CleanNew(RoundupSize(aSize-bSize+2));
3980 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3985 PositiveDivide(remainder, quotient, dividend, divisor);
3993 remainder = divisor.AbsoluteValue() - remainder;
4007 if (wordCount <= a.WordCount())
4009 r.reg.
resize(RoundupSize(wordCount));
4010 CopyWords(r.reg, a.reg, wordCount);
4011 SetWords(r.reg+wordCount, 0, r.reg.
size()-wordCount);
4012 if (n % WORD_BITS != 0)
4013 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4017 r.reg.
resize(RoundupSize(a.WordCount()));
4018 CopyWords(r.reg, a.reg, r.reg.
size());
4022 if (a.IsNegative() && r.
NotZero())
4050 if ((divisor & (divisor-1)) == 0)
4053 remainder = dividend.reg[0] & (divisor-1);
4058 quotient.reg.
CleanNew(RoundupSize(i));
4062 quotient.reg[i] =
DWord(dividend.reg[i], remainder) / divisor;
4063 remainder =
DWord(dividend.reg[i], remainder) % divisor;
4074 remainder = divisor - remainder;
4079 Integer Integer::DividedBy(word b)
const
4096 if ((divisor & (divisor-1)) == 0)
4097 remainder = reg[0] & (divisor-1);
4107 remainder = sum % divisor;
4113 remainder =
DWord(reg[i], remainder) % divisor;
4118 remainder = divisor - remainder;
4126 sign =
Sign(1-sign);
4129 int Integer::PositiveCompare(
const Integer& t)
const
4134 return CryptoPP::Compare(reg, t.reg, size);
4136 return size > tSize ? 1 : -1;
4144 return PositiveCompare(t);
4153 return -PositiveCompare(t);
4164 assert(y*y >= *
this);
4169 y = (x + *
this/x) >> 1;
4183 return (
WordCount() == 1) && (reg[0] == 1);
4199 return mr.Exponentiate(x, e);
4222 return !u ?
Zero() : (m*(*
this-u)+1)/(*this);
4227 unsigned k = AlmostInverse(r.reg, T, reg, reg.
size(), m.reg, m.reg.
size());
4228 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.
size());
4234 word g0 = mod, g1 = *
this % mod;
4235 word v0 = 0, v1 = 1;
4263 if (oid != ASN1::prime_field())
4273 ASN1::prime_field().DEREncode(seq);
4290 if (a.reg.size()==m_modulus.reg.
size())
4292 CryptoPP::DivideByPower2Mod(m_result.reg.
begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4296 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4301 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4303 if (CryptoPP::Add(m_result.reg.
begin(), a.reg, b.reg, a.reg.size())
4304 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4306 CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4313 if (m_result1 >= m_modulus)
4314 m_result1 -= m_modulus;
4321 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4323 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4324 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4326 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4341 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4343 if (CryptoPP::Subtract(m_result.reg.
begin(), a.reg, b.reg, a.reg.size()))
4344 CryptoPP::Add(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4351 m_result1 += m_modulus;
4358 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4360 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4361 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4378 CopyWords(m_result.reg.
begin(), m_modulus.reg, m_modulus.reg.
size());
4379 if (CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, a.reg, a.reg.size()))
4380 Decrement(m_result.reg.
begin()+a.reg.size(), m_modulus.reg.
size()-a.reg.size());
4387 if (m_modulus.
IsOdd())
4398 if (m_modulus.
IsOdd())
4402 for (
unsigned int i=0; i<exponentsCount; i++)
4411 m_u((word)0, m_modulus.reg.size()),
4412 m_workspace(5*m_modulus.reg.size())
4414 if (!m_modulus.IsOdd())
4415 throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4417 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.
size());
4422 word *
const T = m_workspace.
begin();
4423 word *
const R = m_result.reg.begin();
4424 const size_t N = m_modulus.reg.size();
4425 assert(a.reg.size()<=N && b.reg.size()<=N);
4427 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4428 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4429 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4435 word *
const T = m_workspace.
begin();
4436 word *
const R = m_result.reg.begin();
4437 const size_t N = m_modulus.reg.size();
4438 assert(a.reg.size()<=N);
4440 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4441 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4442 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4448 word *
const T = m_workspace.
begin();
4449 word *
const R = m_result.reg.begin();
4450 const size_t N = m_modulus.reg.size();
4451 assert(a.reg.size()<=N);
4453 CopyWords(T, a.reg, a.reg.size());
4454 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4455 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4462 word *
const T = m_workspace.
begin();
4463 word *
const R = m_result.reg.begin();
4464 const size_t N = m_modulus.reg.size();
4465 assert(a.reg.size()<=N);
4467 CopyWords(T, a.reg, a.reg.size());
4468 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4469 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4470 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4475 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4477 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4484 template <> CRYPTOPP_DLL
4488 static const unsigned int BIT_32 = (1U << 31);
4489 const bool UPPER = !!(base & BIT_32);
4490 static const unsigned int BIT_31 = (1U << 30);
4491 const bool BASE = !!(base & BIT_31);
4493 const char CH = UPPER ?
'A' :
'a';
4494 base &= ~(BIT_32|BIT_31);
4495 assert(base >= 2 && base <= 32);
4500 bool negative =
false, zero =
false;
4518 s[i++]=char((digit < 10 ?
'0' : (CH - 10)) + digit);
4523 result.reserve(i+2);
4538 else if (base == 16)
4550 template <> CRYPTOPP_DLL
4554 static const unsigned int HIGH_BIT = (1U << 31);
4555 const char CH = !!(base & HIGH_BIT) ?
'A' :
'a';
4565 word64 digit = value % base;
4566 result = char((digit < 10 ?
'0' : (CH - 10)) + digit) + result;