Crypto++  5.6.4
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48 #if (__SUNPRO_CC >= 0x5130)
49 # define MAYBE_CONST
50 # define MAYBE_UNCONST_CAST const_cast<word*>
51 #else
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
54 #endif
55 
56 // "Inline assembly operands don't work with .intel_syntax",
57 // http://llvm.org/bugs/show_bug.cgi?id=24232
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66 #else
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68 #endif
69 
70 NAMESPACE_BEGIN(CryptoPP)
71 
72 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
73 {
74  if (valueType != typeid(Integer))
75  return false;
76  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
77  return true;
78 }
79 
80 inline static int Compare(const word *A, const word *B, size_t N)
81 {
82  while (N--)
83  if (A[N] > B[N])
84  return 1;
85  else if (A[N] < B[N])
86  return -1;
87 
88  return 0;
89 }
90 
91 inline static int Increment(word *A, size_t N, word B=1)
92 {
93  assert(N);
94  word t = A[0];
95  A[0] = t+B;
96  if (A[0] >= t)
97  return 0;
98  for (unsigned i=1; i<N; i++)
99  if (++A[i])
100  return 0;
101  return 1;
102 }
103 
104 inline static int Decrement(word *A, size_t N, word B=1)
105 {
106  assert(N);
107  word t = A[0];
108  A[0] = t-B;
109  if (A[0] <= t)
110  return 0;
111  for (unsigned i=1; i<N; i++)
112  if (A[i]--)
113  return 0;
114  return 1;
115 }
116 
117 static void TwosComplement(word *A, size_t N)
118 {
119  Decrement(A, N);
120  for (unsigned i=0; i<N; i++)
121  A[i] = ~A[i];
122 }
123 
124 static word AtomicInverseModPower2(word A)
125 {
126  assert(A%2==1);
127 
128  word R=A%8;
129 
130  for (unsigned i=3; i<WORD_BITS; i*=2)
131  R = R*(2-R*A);
132 
133  assert(R*A==1);
134  return R;
135 }
136 
137 // ********************************************************
138 
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140  #define Declare2Words(x) word x##0, x##1;
141  #define AssignWord(a, b) a##0 = b; a##1 = 0;
142  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143  #define LowWord(a) a##0
144  #define HighWord(a) a##1
145  #ifdef _MSC_VER
146  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147  #ifndef __INTEL_COMPILER
148  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
149  #endif
150  #elif defined(__DECCXX)
151  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152  #elif defined(__x86_64__)
153  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
154  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
155  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
156  #else
157  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
163  #endif
164  #endif
165  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
166  #ifndef Double3Words
167  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
168  #endif
169  #ifndef Acc2WordsBy2
170  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
171  #endif
172  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174  #define GetCarry(u) u##1
175  #define GetBorrow(u) u##1
176 #else
177  #define Declare2Words(x) dword x;
178  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179  #define MultiplyWords(p, a, b) p = __emulu(a, b);
180  #else
181  #define MultiplyWords(p, a, b) p = (dword)a*b;
182  #endif
183  #define AssignWord(a, b) a = b;
184  #define Add2WordsBy1(a, b, c) a = b + c;
185  #define Acc2WordsBy2(a, b) a += b;
186  #define LowWord(a) word(a)
187  #define HighWord(a) word(a>>WORD_BITS)
188  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191  #define GetCarry(u) HighWord(u)
192  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
193 #endif
194 #ifndef MulAcc
195  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
196 #endif
197 #ifndef Acc2WordsBy1
198  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
199 #endif
200 #ifndef Acc3WordsBy2
201  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
202 #endif
203 
204 class DWord
205 {
206 public:
207  // Converity finding on default ctor. We've isntrumented the code,
208  // and cannot uncover a case where it affects a result.
209 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
210  // Repeating pattern of 1010 for debug builds to break things...
211  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
212 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
213  // Repeating pattern of 1010 for debug builds to break things...
214  DWord() : m_halfs() {memset(&m_halfs, 0xaa, sizeof(m_halfs));}
215 #else
216  DWord() {}
217 #endif
218 
219 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
220  explicit DWord(word low) : m_whole(low) {}
221 #else
222  explicit DWord(word low)
223  {
224  m_halfs.low = low;
225  m_halfs.high = 0;
226  }
227 #endif
228 
229  DWord(word low, word high)
230  {
231  m_halfs.low = low;
232  m_halfs.high = high;
233  }
234 
235  static DWord Multiply(word a, word b)
236  {
237  DWord r;
238  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
239  r.m_whole = (dword)a * b;
240  #elif defined(MultiplyWordsLoHi)
241  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
242  #else
243  assert(0);
244  #endif
245  return r;
246  }
247 
248  static DWord MultiplyAndAdd(word a, word b, word c)
249  {
250  DWord r = Multiply(a, b);
251  return r += c;
252  }
253 
254  DWord & operator+=(word a)
255  {
256  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
257  m_whole = m_whole + a;
258  #else
259  m_halfs.low += a;
260  m_halfs.high += (m_halfs.low < a);
261  #endif
262  return *this;
263  }
264 
265  DWord operator+(word a)
266  {
267  DWord r;
268  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
269  r.m_whole = m_whole + a;
270  #else
271  r.m_halfs.low = m_halfs.low + a;
272  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
273  #endif
274  return r;
275  }
276 
277  DWord operator-(DWord a)
278  {
279  DWord r;
280  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
281  r.m_whole = m_whole - a.m_whole;
282  #else
283  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
284  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
285  #endif
286  return r;
287  }
288 
289  DWord operator-(word a)
290  {
291  DWord r;
292  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
293  r.m_whole = m_whole - a;
294  #else
295  r.m_halfs.low = m_halfs.low - a;
296  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
297  #endif
298  return r;
299  }
300 
301  // returns quotient, which must fit in a word
302  word operator/(word divisor);
303 
304  word operator%(word a);
305 
306  bool operator!() const
307  {
308  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
309  return !m_whole;
310  #else
311  return !m_halfs.high && !m_halfs.low;
312  #endif
313  }
314 
315  word GetLowHalf() const {return m_halfs.low;}
316  word GetHighHalf() const {return m_halfs.high;}
317  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
318 
319 private:
320  union
321  {
322  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
323  dword m_whole;
324  #endif
325  struct
326  {
327  #ifdef IS_LITTLE_ENDIAN
328  word low;
329  word high;
330  #else
331  word high;
332  word low;
333  #endif
334  } m_halfs;
335  };
336 };
337 
338 class Word
339 {
340 public:
341  // Converity finding on default ctor. We've isntrumented the code,
342  // and cannot uncover a case where it affects a result.
343 #if defined(__COVERITY__)
344  Word() : m_whole(0) {}
345 #elif !defined(NDEBUG)
346  // Repeating pattern of 1010 for debug builds to break things...
347  Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
348 #else
349  Word() {}
350 #endif
351 
352  Word(word value) : m_whole(value) {}
353  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
354 
355  static Word Multiply(hword a, hword b)
356  {
357  Word r;
358  r.m_whole = (word)a * b;
359  return r;
360  }
361 
362  Word operator-(Word a)
363  {
364  Word r;
365  r.m_whole = m_whole - a.m_whole;
366  return r;
367  }
368 
369  Word operator-(hword a)
370  {
371  Word r;
372  r.m_whole = m_whole - a;
373  return r;
374  }
375 
376  // returns quotient, which must fit in a word
377  hword operator/(hword divisor)
378  {
379  return hword(m_whole / divisor);
380  }
381 
382  bool operator!() const
383  {
384  return !m_whole;
385  }
386 
387  word GetWhole() const {return m_whole;}
388  hword GetLowHalf() const {return hword(m_whole);}
389  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
390  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
391 
392 private:
393  word m_whole;
394 };
395 
396 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
397 template <class S, class D>
398 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
399 {
400  CRYPTOPP_UNUSED(dummy);
401 
402  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
403  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
404 
405  // estimate the quotient: do a 2 S by 1 S divide
406  S Q;
407  if (S(B1+1) == 0)
408  Q = A[2];
409  else if (B1 > 0)
410  Q = D(A[1], A[2]) / S(B1+1);
411  else
412  Q = D(A[0], A[1]) / B0;
413 
414  // now subtract Q*B from A
415  D p = D::Multiply(B0, Q);
416  D u = (D) A[0] - p.GetLowHalf();
417  A[0] = u.GetLowHalf();
418  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
419  A[1] = u.GetLowHalf();
420  A[2] += u.GetHighHalf();
421 
422  // Q <= actual quotient, so fix it
423  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
424  {
425  u = (D) A[0] - B0;
426  A[0] = u.GetLowHalf();
427  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
428  A[1] = u.GetLowHalf();
429  A[2] += u.GetHighHalf();
430  Q++;
431  assert(Q); // shouldn't overflow
432  }
433 
434  return Q;
435 }
436 
437 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
438 template <class S, class D>
439 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
440 {
441  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
442  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
443  else
444  {
445  S Q[2];
446  T[0] = Al.GetLowHalf();
447  T[1] = Al.GetHighHalf();
448  T[2] = Ah.GetLowHalf();
449  T[3] = Ah.GetHighHalf();
450  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
451  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
452  return D(Q[0], Q[1]);
453  }
454 }
455 
456 // returns quotient, which must fit in a word
457 inline word DWord::operator/(word a)
458 {
459  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
460  return word(m_whole / a);
461  #else
462  hword r[4];
463  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
464  #endif
465 }
466 
467 inline word DWord::operator%(word a)
468 {
469  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
470  return word(m_whole % a);
471  #else
472  if (a < (word(1) << (WORD_BITS/2)))
473  {
474  hword h = hword(a);
475  word r = m_halfs.high % h;
476  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
477  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
478  }
479  else
480  {
481  hword r[4];
482  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
483  return Word(r[0], r[1]).GetWhole();
484  }
485  #endif
486 }
487 
488 // ********************************************************
489 
490 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
491 #if defined(__GNUC__)
492  #define AddPrologue \
493  int result; \
494  __asm__ __volatile__ \
495  ( \
496  INTEL_NOPREFIX
497  #define AddEpilogue \
498  ATT_PREFIX \
499  : "=a" (result)\
500  : "d" (C), "a" (A), "D" (B), "c" (N) \
501  : "%esi", "memory", "cc" \
502  );\
503  return result;
504  #define MulPrologue \
505  __asm__ __volatile__ \
506  ( \
507  INTEL_NOPREFIX \
508  AS1( push ebx) \
509  AS2( mov ebx, edx)
510  #define MulEpilogue \
511  AS1( pop ebx) \
512  ATT_PREFIX \
513  : \
514  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
515  : "%esi", "memory", "cc" \
516  );
517  #define SquPrologue MulPrologue
518  #define SquEpilogue \
519  AS1( pop ebx) \
520  ATT_PREFIX \
521  : \
522  : "d" (s_maskLow16), "c" (C), "a" (A) \
523  : "%esi", "%edi", "memory", "cc" \
524  );
525  #define TopPrologue MulPrologue
526  #define TopEpilogue \
527  AS1( pop ebx) \
528  ATT_PREFIX \
529  : \
530  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
531  : "memory", "cc" \
532  );
533 #else
534  #define AddPrologue \
535  __asm push edi \
536  __asm push esi \
537  __asm mov eax, [esp+12] \
538  __asm mov edi, [esp+16]
539  #define AddEpilogue \
540  __asm pop esi \
541  __asm pop edi \
542  __asm ret 8
543 #if _MSC_VER < 1300
544  #define SaveEBX __asm push ebx
545  #define RestoreEBX __asm pop ebx
546 #else
547  #define SaveEBX
548  #define RestoreEBX
549 #endif
550  #define SquPrologue \
551  AS2( mov eax, A) \
552  AS2( mov ecx, C) \
553  SaveEBX \
554  AS2( lea ebx, s_maskLow16)
555  #define MulPrologue \
556  AS2( mov eax, A) \
557  AS2( mov edi, B) \
558  AS2( mov ecx, C) \
559  SaveEBX \
560  AS2( lea ebx, s_maskLow16)
561  #define TopPrologue \
562  AS2( mov eax, A) \
563  AS2( mov edi, B) \
564  AS2( mov ecx, C) \
565  AS2( mov esi, L) \
566  SaveEBX \
567  AS2( lea ebx, s_maskLow16)
568  #define SquEpilogue RestoreEBX
569  #define MulEpilogue RestoreEBX
570  #define TopEpilogue RestoreEBX
571 #endif
572 
573 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
574 extern "C" {
575 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
576 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
577 }
578 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
579 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
580 {
581  word result;
582  __asm__ __volatile__
583  (
584  INTEL_NOPREFIX
585  AS1( neg %1)
586  ASJ( jz, 1, f)
587  AS2( mov %0,[%3+8*%1])
588  AS2( add %0,[%4+8*%1])
589  AS2( mov [%2+8*%1],%0)
590  ASL(0)
591  AS2( mov %0,[%3+8*%1+8])
592  AS2( adc %0,[%4+8*%1+8])
593  AS2( mov [%2+8*%1+8],%0)
594  AS2( lea %1,[%1+2])
595  ASJ( jrcxz, 1, f)
596  AS2( mov %0,[%3+8*%1])
597  AS2( adc %0,[%4+8*%1])
598  AS2( mov [%2+8*%1],%0)
599  ASJ( jmp, 0, b)
600  ASL(1)
601  AS2( mov %0, 0)
602  AS2( adc %0, %0)
603  ATT_NOPREFIX
604  : "=&r" (result), "+c" (N)
605  : "r" (C+N), "r" (A+N), "r" (B+N)
606  : "memory", "cc"
607  );
608  return (int)result;
609 }
610 
611 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
612 {
613  word result;
614  __asm__ __volatile__
615  (
616  INTEL_NOPREFIX
617  AS1( neg %1)
618  ASJ( jz, 1, f)
619  AS2( mov %0,[%3+8*%1])
620  AS2( sub %0,[%4+8*%1])
621  AS2( mov [%2+8*%1],%0)
622  ASL(0)
623  AS2( mov %0,[%3+8*%1+8])
624  AS2( sbb %0,[%4+8*%1+8])
625  AS2( mov [%2+8*%1+8],%0)
626  AS2( lea %1,[%1+2])
627  ASJ( jrcxz, 1, f)
628  AS2( mov %0,[%3+8*%1])
629  AS2( sbb %0,[%4+8*%1])
630  AS2( mov [%2+8*%1],%0)
631  ASJ( jmp, 0, b)
632  ASL(1)
633  AS2( mov %0, 0)
634  AS2( adc %0, %0)
635  ATT_NOPREFIX
636  : "=&r" (result), "+c" (N)
637  : "r" (C+N), "r" (A+N), "r" (B+N)
638  : "memory", "cc"
639  );
640  return (int)result;
641 }
642 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
643 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
644 {
645  AddPrologue
646 
647  // now: eax = A, edi = B, edx = C, ecx = N
648  AS2( lea eax, [eax+4*ecx])
649  AS2( lea edi, [edi+4*ecx])
650  AS2( lea edx, [edx+4*ecx])
651 
652  AS1( neg ecx) // ecx is negative index
653  AS2( test ecx, 2) // this clears carry flag
654  ASJ( jz, 0, f)
655  AS2( sub ecx, 2)
656  ASJ( jmp, 1, f)
657 
658  ASL(0)
659  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
660  AS2( mov esi,[eax+4*ecx])
661  AS2( adc esi,[edi+4*ecx])
662  AS2( mov [edx+4*ecx],esi)
663  AS2( mov esi,[eax+4*ecx+4])
664  AS2( adc esi,[edi+4*ecx+4])
665  AS2( mov [edx+4*ecx+4],esi)
666  ASL(1)
667  AS2( mov esi,[eax+4*ecx+8])
668  AS2( adc esi,[edi+4*ecx+8])
669  AS2( mov [edx+4*ecx+8],esi)
670  AS2( mov esi,[eax+4*ecx+12])
671  AS2( adc esi,[edi+4*ecx+12])
672  AS2( mov [edx+4*ecx+12],esi)
673 
674  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
675  ASJ( jmp, 0, b)
676 
677  ASL(2)
678  AS2( mov eax, 0)
679  AS1( setc al) // store carry into eax (return result register)
680 
681  AddEpilogue
682 }
683 
684 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
685 {
686  AddPrologue
687 
688  // now: eax = A, edi = B, edx = C, ecx = N
689  AS2( lea eax, [eax+4*ecx])
690  AS2( lea edi, [edi+4*ecx])
691  AS2( lea edx, [edx+4*ecx])
692 
693  AS1( neg ecx) // ecx is negative index
694  AS2( test ecx, 2) // this clears carry flag
695  ASJ( jz, 0, f)
696  AS2( sub ecx, 2)
697  ASJ( jmp, 1, f)
698 
699  ASL(0)
700  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
701  AS2( mov esi,[eax+4*ecx])
702  AS2( sbb esi,[edi+4*ecx])
703  AS2( mov [edx+4*ecx],esi)
704  AS2( mov esi,[eax+4*ecx+4])
705  AS2( sbb esi,[edi+4*ecx+4])
706  AS2( mov [edx+4*ecx+4],esi)
707  ASL(1)
708  AS2( mov esi,[eax+4*ecx+8])
709  AS2( sbb esi,[edi+4*ecx+8])
710  AS2( mov [edx+4*ecx+8],esi)
711  AS2( mov esi,[eax+4*ecx+12])
712  AS2( sbb esi,[edi+4*ecx+12])
713  AS2( mov [edx+4*ecx+12],esi)
714 
715  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
716  ASJ( jmp, 0, b)
717 
718  ASL(2)
719  AS2( mov eax, 0)
720  AS1( setc al) // store carry into eax (return result register)
721 
722  AddEpilogue
723 }
724 
725 #if CRYPTOPP_INTEGER_SSE2
726 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
727 {
728  AddPrologue
729 
730  // now: eax = A, edi = B, edx = C, ecx = N
731  AS2( lea eax, [eax+4*ecx])
732  AS2( lea edi, [edi+4*ecx])
733  AS2( lea edx, [edx+4*ecx])
734 
735  AS1( neg ecx) // ecx is negative index
736  AS2( pxor mm2, mm2)
737  ASJ( jz, 2, f)
738  AS2( test ecx, 2) // this clears carry flag
739  ASJ( jz, 0, f)
740  AS2( sub ecx, 2)
741  ASJ( jmp, 1, f)
742 
743  ASL(0)
744  AS2( movd mm0, DWORD PTR [eax+4*ecx])
745  AS2( movd mm1, DWORD PTR [edi+4*ecx])
746  AS2( paddq mm0, mm1)
747  AS2( paddq mm2, mm0)
748  AS2( movd DWORD PTR [edx+4*ecx], mm2)
749  AS2( psrlq mm2, 32)
750 
751  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
752  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
753  AS2( paddq mm0, mm1)
754  AS2( paddq mm2, mm0)
755  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
756  AS2( psrlq mm2, 32)
757 
758  ASL(1)
759  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
760  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
761  AS2( paddq mm0, mm1)
762  AS2( paddq mm2, mm0)
763  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
764  AS2( psrlq mm2, 32)
765 
766  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
767  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
768  AS2( paddq mm0, mm1)
769  AS2( paddq mm2, mm0)
770  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
771  AS2( psrlq mm2, 32)
772 
773  AS2( add ecx, 4)
774  ASJ( jnz, 0, b)
775 
776  ASL(2)
777  AS2( movd eax, mm2)
778  AS1( emms)
779 
780  AddEpilogue
781 }
782 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
783 {
784  AddPrologue
785 
786  // now: eax = A, edi = B, edx = C, ecx = N
787  AS2( lea eax, [eax+4*ecx])
788  AS2( lea edi, [edi+4*ecx])
789  AS2( lea edx, [edx+4*ecx])
790 
791  AS1( neg ecx) // ecx is negative index
792  AS2( pxor mm2, mm2)
793  ASJ( jz, 2, f)
794  AS2( test ecx, 2) // this clears carry flag
795  ASJ( jz, 0, f)
796  AS2( sub ecx, 2)
797  ASJ( jmp, 1, f)
798 
799  ASL(0)
800  AS2( movd mm0, DWORD PTR [eax+4*ecx])
801  AS2( movd mm1, DWORD PTR [edi+4*ecx])
802  AS2( psubq mm0, mm1)
803  AS2( psubq mm0, mm2)
804  AS2( movd DWORD PTR [edx+4*ecx], mm0)
805  AS2( psrlq mm0, 63)
806 
807  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
808  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
809  AS2( psubq mm2, mm1)
810  AS2( psubq mm2, mm0)
811  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
812  AS2( psrlq mm2, 63)
813 
814  ASL(1)
815  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
816  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
817  AS2( psubq mm0, mm1)
818  AS2( psubq mm0, mm2)
819  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
820  AS2( psrlq mm0, 63)
821 
822  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
823  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
824  AS2( psubq mm2, mm1)
825  AS2( psubq mm2, mm0)
826  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
827  AS2( psrlq mm2, 63)
828 
829  AS2( add ecx, 4)
830  ASJ( jnz, 0, b)
831 
832  ASL(2)
833  AS2( movd eax, mm2)
834  AS1( emms)
835 
836  AddEpilogue
837 }
838 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
839 #else
840 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
841 {
842  assert (N%2 == 0);
843 
844  Declare2Words(u);
845  AssignWord(u, 0);
846  for (size_t i=0; i<N; i+=2)
847  {
848  AddWithCarry(u, A[i], B[i]);
849  C[i] = LowWord(u);
850  AddWithCarry(u, A[i+1], B[i+1]);
851  C[i+1] = LowWord(u);
852  }
853  return int(GetCarry(u));
854 }
855 
856 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
857 {
858  assert (N%2 == 0);
859 
860  Declare2Words(u);
861  AssignWord(u, 0);
862  for (size_t i=0; i<N; i+=2)
863  {
864  SubtractWithBorrow(u, A[i], B[i]);
865  C[i] = LowWord(u);
866  SubtractWithBorrow(u, A[i+1], B[i+1]);
867  C[i+1] = LowWord(u);
868  }
869  return int(GetBorrow(u));
870 }
871 #endif
872 
873 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
874 {
875  // http://github.com/weidai11/cryptopp/issues/188
876  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
877 
878  word carry=0;
879  for(unsigned i=0; i<N; i++)
880  {
881  Declare2Words(p);
882  MultiplyWords(p, A[i], B);
883  Acc2WordsBy1(p, carry);
884  C[i] = LowWord(p);
885  carry = HighWord(p);
886  }
887  return carry;
888 }
889 
890 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
891 
892 #define Mul_2 \
893  Mul_Begin(2) \
894  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
895  Mul_End(1, 1)
896 
897 #define Mul_4 \
898  Mul_Begin(4) \
899  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
900  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
901  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
902  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
903  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
904  Mul_End(5, 3)
905 
906 #define Mul_8 \
907  Mul_Begin(8) \
908  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
909  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
910  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
911  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
912  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
913  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
914  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
915  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
916  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
917  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
918  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
919  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
920  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
921  Mul_End(13, 7)
922 
923 #define Mul_16 \
924  Mul_Begin(16) \
925  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
926  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
927  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
928  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
929  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
930  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
931  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
932  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
933  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
934  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
935  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
936  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
937  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
938  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
939  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
940  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
941  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
942  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
943  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
944  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
945  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
946  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
947  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
948  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
949  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
950  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
951  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
952  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
953  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
954  Mul_End(29, 15)
955 
956 #define Squ_2 \
957  Squ_Begin(2) \
958  Squ_End(2)
959 
960 #define Squ_4 \
961  Squ_Begin(4) \
962  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
963  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
964  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
965  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
966  Squ_End(4)
967 
968 #define Squ_8 \
969  Squ_Begin(8) \
970  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
971  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
972  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
973  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
974  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
975  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
976  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
977  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
978  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
979  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
980  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
981  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
982  Squ_End(8)
983 
984 #define Squ_16 \
985  Squ_Begin(16) \
986  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
987  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
988  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
989  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
990  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
991  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
992  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
993  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
994  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
995  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
996  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
997  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
998  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
999  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1000  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1001  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1002  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1003  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1004  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1005  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1006  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1007  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1008  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1009  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1010  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1011  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1012  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1013  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1014  Squ_End(16)
1015 
1016 #define Bot_2 \
1017  Mul_Begin(2) \
1018  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1019  Bot_End(2)
1020 
1021 #define Bot_4 \
1022  Mul_Begin(4) \
1023  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1024  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1025  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1026  Bot_End(4)
1027 
1028 #define Bot_8 \
1029  Mul_Begin(8) \
1030  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1031  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1032  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1033  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1034  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1035  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1036  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1037  Bot_End(8)
1038 
1039 #define Bot_16 \
1040  Mul_Begin(16) \
1041  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1042  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1043  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1044  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1045  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1046  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1047  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1048  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1049  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1050  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1051  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1052  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1053  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1054  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1055  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1056  Bot_End(16)
1057 
1058 #endif
1059 
1060 #if 0
1061 #define Mul_Begin(n) \
1062  Declare2Words(p) \
1063  Declare2Words(c) \
1064  Declare2Words(d) \
1065  MultiplyWords(p, A[0], B[0]) \
1066  AssignWord(c, LowWord(p)) \
1067  AssignWord(d, HighWord(p))
1068 
1069 #define Mul_Acc(i, j) \
1070  MultiplyWords(p, A[i], B[j]) \
1071  Acc2WordsBy1(c, LowWord(p)) \
1072  Acc2WordsBy1(d, HighWord(p))
1073 
1074 #define Mul_SaveAcc(k, i, j) \
1075  R[k] = LowWord(c); \
1076  Add2WordsBy1(c, d, HighWord(c)) \
1077  MultiplyWords(p, A[i], B[j]) \
1078  AssignWord(d, HighWord(p)) \
1079  Acc2WordsBy1(c, LowWord(p))
1080 
1081 #define Mul_End(n) \
1082  R[2*n-3] = LowWord(c); \
1083  Acc2WordsBy1(d, HighWord(c)) \
1084  MultiplyWords(p, A[n-1], B[n-1])\
1085  Acc2WordsBy2(d, p) \
1086  R[2*n-2] = LowWord(d); \
1087  R[2*n-1] = HighWord(d);
1088 
1089 #define Bot_SaveAcc(k, i, j) \
1090  R[k] = LowWord(c); \
1091  word e = LowWord(d) + HighWord(c); \
1092  e += A[i] * B[j];
1093 
1094 #define Bot_Acc(i, j) \
1095  e += A[i] * B[j];
1096 
1097 #define Bot_End(n) \
1098  R[n-1] = e;
1099 #else
1100 #define Mul_Begin(n) \
1101  Declare2Words(p) \
1102  word c; \
1103  Declare2Words(d) \
1104  MultiplyWords(p, A[0], B[0]) \
1105  c = LowWord(p); \
1106  AssignWord(d, HighWord(p))
1107 
1108 #define Mul_Acc(i, j) \
1109  MulAcc(c, d, A[i], B[j])
1110 
1111 #define Mul_SaveAcc(k, i, j) \
1112  R[k] = c; \
1113  c = LowWord(d); \
1114  AssignWord(d, HighWord(d)) \
1115  MulAcc(c, d, A[i], B[j])
1116 
1117 #define Mul_End(k, i) \
1118  R[k] = c; \
1119  MultiplyWords(p, A[i], B[i]) \
1120  Acc2WordsBy2(p, d) \
1121  R[k+1] = LowWord(p); \
1122  R[k+2] = HighWord(p);
1123 
1124 #define Bot_SaveAcc(k, i, j) \
1125  R[k] = c; \
1126  c = LowWord(d); \
1127  c += A[i] * B[j];
1128 
1129 #define Bot_Acc(i, j) \
1130  c += A[i] * B[j];
1131 
1132 #define Bot_End(n) \
1133  R[n-1] = c;
1134 #endif
1135 
1136 #define Squ_Begin(n) \
1137  Declare2Words(p) \
1138  word c; \
1139  Declare2Words(d) \
1140  Declare2Words(e) \
1141  MultiplyWords(p, A[0], A[0]) \
1142  R[0] = LowWord(p); \
1143  AssignWord(e, HighWord(p)) \
1144  MultiplyWords(p, A[0], A[1]) \
1145  c = LowWord(p); \
1146  AssignWord(d, HighWord(p)) \
1147  Squ_NonDiag \
1148 
1149 #define Squ_NonDiag \
1150  Double3Words(c, d)
1151 
1152 #define Squ_SaveAcc(k, i, j) \
1153  Acc3WordsBy2(c, d, e) \
1154  R[k] = c; \
1155  MultiplyWords(p, A[i], A[j]) \
1156  c = LowWord(p); \
1157  AssignWord(d, HighWord(p)) \
1158 
1159 #define Squ_Acc(i, j) \
1160  MulAcc(c, d, A[i], A[j])
1161 
1162 #define Squ_Diag(i) \
1163  Squ_NonDiag \
1164  MulAcc(c, d, A[i], A[i])
1165 
1166 #define Squ_End(n) \
1167  Acc3WordsBy2(c, d, e) \
1168  R[2*n-3] = c; \
1169  MultiplyWords(p, A[n-1], A[n-1])\
1170  Acc2WordsBy2(p, e) \
1171  R[2*n-2] = LowWord(p); \
1172  R[2*n-1] = HighWord(p);
1173 
1174 
1175 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1176 {
1177  // http://github.com/weidai11/cryptopp/issues/188
1178  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1179  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1180 
1181  Mul_2
1182 }
1183 
1184 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1185 {
1186  // http://github.com/weidai11/cryptopp/issues/188
1187  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1188  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1189 
1190  Mul_4
1191 }
1192 
1193 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1194 {
1195  // http://github.com/weidai11/cryptopp/issues/188
1196  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1197  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1198 
1199  Mul_8
1200 }
1201 
1202 void Baseline_Square2(word *R, const word *AA)
1203 {
1204  // http://github.com/weidai11/cryptopp/issues/188
1205  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1206 
1207  Squ_2
1208 }
1209 
1210 void Baseline_Square4(word *R, const word *AA)
1211 {
1212  // http://github.com/weidai11/cryptopp/issues/188
1213  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1214 
1215  Squ_4
1216 }
1217 
1218 void Baseline_Square8(word *R, const word *AA)
1219 {
1220  // http://github.com/weidai11/cryptopp/issues/188
1221  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1222 
1223  Squ_8
1224 }
1225 
1226 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1227 {
1228  // http://github.com/weidai11/cryptopp/issues/188
1229  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1230  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1231 
1232  Bot_2
1233 }
1234 
1235 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1236 {
1237  // http://github.com/weidai11/cryptopp/issues/188
1238  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1239  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1240 
1241  Bot_4
1242 }
1243 
1244 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1245 {
1246  // http://github.com/weidai11/cryptopp/issues/188
1247  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1248  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1249 
1250  Bot_8
1251 }
1252 
1253 #define Top_Begin(n) \
1254  Declare2Words(p) \
1255  word c; \
1256  Declare2Words(d) \
1257  MultiplyWords(p, A[0], B[n-2]);\
1258  AssignWord(d, HighWord(p));
1259 
1260 #define Top_Acc(i, j) \
1261  MultiplyWords(p, A[i], B[j]);\
1262  Acc2WordsBy1(d, HighWord(p));
1263 
1264 #define Top_SaveAcc0(i, j) \
1265  c = LowWord(d); \
1266  AssignWord(d, HighWord(d)) \
1267  MulAcc(c, d, A[i], B[j])
1268 
1269 #define Top_SaveAcc1(i, j) \
1270  c = L<c; \
1271  Acc2WordsBy1(d, c); \
1272  c = LowWord(d); \
1273  AssignWord(d, HighWord(d)) \
1274  MulAcc(c, d, A[i], B[j])
1275 
1276 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1277 {
1278  CRYPTOPP_UNUSED(L);
1279  word T[4];
1280  Baseline_Multiply2(T, A, B);
1281  R[0] = T[2];
1282  R[1] = T[3];
1283 }
1284 
1285 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1286 {
1287  // http://github.com/weidai11/cryptopp/issues/188
1288  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1289  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1290 
1291  Top_Begin(4)
1292  Top_Acc(1, 1) Top_Acc(2, 0) \
1293  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1294  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1295  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1296  Mul_End(1, 3)
1297 }
1298 
1299 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1300 {
1301  // http://github.com/weidai11/cryptopp/issues/188
1302  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1303  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1304 
1305  Top_Begin(8)
1306  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1307  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1308  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1309  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1310  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1311  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1312  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1313  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1314  Mul_End(5, 7)
1315 }
1316 
1317 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1318 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1319 {
1320  // http://github.com/weidai11/cryptopp/issues/188
1321  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1322  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1323 
1324  Mul_16
1325 }
1326 
1327 void Baseline_Square16(word *R, const word *AA)
1328 {
1329  // http://github.com/weidai11/cryptopp/issues/188
1330  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1331 
1332  Squ_16
1333 }
1334 
1335 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1336 {
1337  // http://github.com/weidai11/cryptopp/issues/188
1338  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1339  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1340 
1341  Bot_16
1342 }
1343 
1344 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1345 {
1346  // http://github.com/weidai11/cryptopp/issues/188
1347  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1348  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1349 
1350  Top_Begin(16)
1351  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1352  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1353  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1354  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1355  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1356  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1357  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1358  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1359  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1360  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1361  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1362  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1363  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1364  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1365  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1366  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1367  Mul_End(13, 15)
1368 }
1369 #endif
1370 
1371 // ********************************************************
1372 
1373 #if CRYPTOPP_INTEGER_SSE2
1374 
1375 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1376 
1377 #undef Mul_Begin
1378 #undef Mul_Acc
1379 #undef Top_Begin
1380 #undef Top_Acc
1381 #undef Squ_Acc
1382 #undef Squ_NonDiag
1383 #undef Squ_Diag
1384 #undef Squ_SaveAcc
1385 #undef Squ_Begin
1386 #undef Mul_SaveAcc
1387 #undef Bot_Acc
1388 #undef Bot_SaveAcc
1389 #undef Bot_End
1390 #undef Squ_End
1391 #undef Mul_End
1392 
1393 #define SSE2_FinalSave(k) \
1394  AS2( psllq xmm5, 16) \
1395  AS2( paddq xmm4, xmm5) \
1396  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1397 
1398 #define SSE2_SaveShift(k) \
1399  AS2( movq xmm0, xmm6) \
1400  AS2( punpckhqdq xmm6, xmm0) \
1401  AS2( movq xmm1, xmm7) \
1402  AS2( punpckhqdq xmm7, xmm1) \
1403  AS2( paddd xmm6, xmm0) \
1404  AS2( pslldq xmm6, 4) \
1405  AS2( paddd xmm7, xmm1) \
1406  AS2( paddd xmm4, xmm6) \
1407  AS2( pslldq xmm7, 4) \
1408  AS2( movq xmm6, xmm4) \
1409  AS2( paddd xmm5, xmm7) \
1410  AS2( movq xmm7, xmm5) \
1411  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1412  AS2( psrlq xmm6, 16) \
1413  AS2( paddq xmm6, xmm7) \
1414  AS2( punpckhqdq xmm4, xmm0) \
1415  AS2( punpckhqdq xmm5, xmm0) \
1416  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1417  AS2( psrlq xmm6, 3*16) \
1418  AS2( paddd xmm4, xmm6) \
1419 
1420 #define Squ_SSE2_SaveShift(k) \
1421  AS2( movq xmm0, xmm6) \
1422  AS2( punpckhqdq xmm6, xmm0) \
1423  AS2( movq xmm1, xmm7) \
1424  AS2( punpckhqdq xmm7, xmm1) \
1425  AS2( paddd xmm6, xmm0) \
1426  AS2( pslldq xmm6, 4) \
1427  AS2( paddd xmm7, xmm1) \
1428  AS2( paddd xmm4, xmm6) \
1429  AS2( pslldq xmm7, 4) \
1430  AS2( movhlps xmm6, xmm4) \
1431  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1432  AS2( paddd xmm5, xmm7) \
1433  AS2( movhps QWORD PTR [esp+12], xmm5)\
1434  AS2( psrlq xmm4, 16) \
1435  AS2( paddq xmm4, xmm5) \
1436  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1437  AS2( psrlq xmm4, 3*16) \
1438  AS2( paddd xmm4, xmm6) \
1439  AS2( movq QWORD PTR [esp+4], xmm4)\
1440 
1441 #define SSE2_FirstMultiply(i) \
1442  AS2( movdqa xmm7, [esi+(i)*16])\
1443  AS2( movdqa xmm5, [edi-(i)*16])\
1444  AS2( pmuludq xmm5, xmm7) \
1445  AS2( movdqa xmm4, [ebx])\
1446  AS2( movdqa xmm6, xmm4) \
1447  AS2( pand xmm4, xmm5) \
1448  AS2( psrld xmm5, 16) \
1449  AS2( pmuludq xmm7, [edx-(i)*16])\
1450  AS2( pand xmm6, xmm7) \
1451  AS2( psrld xmm7, 16)
1452 
1453 #define Squ_Begin(n) \
1454  SquPrologue \
1455  AS2( mov esi, esp)\
1456  AS2( and esp, 0xfffffff0)\
1457  AS2( lea edi, [esp-32*n])\
1458  AS2( sub esp, 32*n+16)\
1459  AS1( push esi)\
1460  AS2( mov esi, edi) \
1461  AS2( xor edx, edx) \
1462  ASL(1) \
1463  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1464  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1465  AS2( movdqa [edi+2*edx], xmm0) \
1466  AS2( psrlq xmm0, 32) \
1467  AS2( movdqa [edi+2*edx+16], xmm0) \
1468  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1469  AS2( psrlq xmm1, 32) \
1470  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1471  AS2( add edx, 16) \
1472  AS2( cmp edx, 8*(n)) \
1473  ASJ( jne, 1, b) \
1474  AS2( lea edx, [edi+16*n])\
1475  SSE2_FirstMultiply(0) \
1476 
1477 #define Squ_Acc(i) \
1478  ASL(LSqu##i) \
1479  AS2( movdqa xmm1, [esi+(i)*16]) \
1480  AS2( movdqa xmm0, [edi-(i)*16]) \
1481  AS2( movdqa xmm2, [ebx]) \
1482  AS2( pmuludq xmm0, xmm1) \
1483  AS2( pmuludq xmm1, [edx-(i)*16]) \
1484  AS2( movdqa xmm3, xmm2) \
1485  AS2( pand xmm2, xmm0) \
1486  AS2( psrld xmm0, 16) \
1487  AS2( paddd xmm4, xmm2) \
1488  AS2( paddd xmm5, xmm0) \
1489  AS2( pand xmm3, xmm1) \
1490  AS2( psrld xmm1, 16) \
1491  AS2( paddd xmm6, xmm3) \
1492  AS2( paddd xmm7, xmm1) \
1493 
1494 #define Squ_Acc1(i)
1495 #define Squ_Acc2(i) ASC(call, LSqu##i)
1496 #define Squ_Acc3(i) Squ_Acc2(i)
1497 #define Squ_Acc4(i) Squ_Acc2(i)
1498 #define Squ_Acc5(i) Squ_Acc2(i)
1499 #define Squ_Acc6(i) Squ_Acc2(i)
1500 #define Squ_Acc7(i) Squ_Acc2(i)
1501 #define Squ_Acc8(i) Squ_Acc2(i)
1502 
1503 #define SSE2_End(E, n) \
1504  SSE2_SaveShift(2*(n)-3) \
1505  AS2( movdqa xmm7, [esi+16]) \
1506  AS2( movdqa xmm0, [edi]) \
1507  AS2( pmuludq xmm0, xmm7) \
1508  AS2( movdqa xmm2, [ebx]) \
1509  AS2( pmuludq xmm7, [edx]) \
1510  AS2( movdqa xmm6, xmm2) \
1511  AS2( pand xmm2, xmm0) \
1512  AS2( psrld xmm0, 16) \
1513  AS2( paddd xmm4, xmm2) \
1514  AS2( paddd xmm5, xmm0) \
1515  AS2( pand xmm6, xmm7) \
1516  AS2( psrld xmm7, 16) \
1517  SSE2_SaveShift(2*(n)-2) \
1518  SSE2_FinalSave(2*(n)-1) \
1519  AS1( pop esp)\
1520  E
1521 
1522 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1523 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1524 #define Top_End(n) SSE2_End(TopEpilogue, n)
1525 
1526 #define Squ_Column1(k, i) \
1527  Squ_SSE2_SaveShift(k) \
1528  AS2( add esi, 16) \
1529  SSE2_FirstMultiply(1)\
1530  Squ_Acc##i(i) \
1531  AS2( paddd xmm4, xmm4) \
1532  AS2( paddd xmm5, xmm5) \
1533  AS2( movdqa xmm3, [esi]) \
1534  AS2( movq xmm1, QWORD PTR [esi+8]) \
1535  AS2( pmuludq xmm1, xmm3) \
1536  AS2( pmuludq xmm3, xmm3) \
1537  AS2( movdqa xmm0, [ebx])\
1538  AS2( movdqa xmm2, xmm0) \
1539  AS2( pand xmm0, xmm1) \
1540  AS2( psrld xmm1, 16) \
1541  AS2( paddd xmm6, xmm0) \
1542  AS2( paddd xmm7, xmm1) \
1543  AS2( pand xmm2, xmm3) \
1544  AS2( psrld xmm3, 16) \
1545  AS2( paddd xmm6, xmm6) \
1546  AS2( paddd xmm7, xmm7) \
1547  AS2( paddd xmm4, xmm2) \
1548  AS2( paddd xmm5, xmm3) \
1549  AS2( movq xmm0, QWORD PTR [esp+4])\
1550  AS2( movq xmm1, QWORD PTR [esp+12])\
1551  AS2( paddd xmm4, xmm0)\
1552  AS2( paddd xmm5, xmm1)\
1553 
1554 #define Squ_Column0(k, i) \
1555  Squ_SSE2_SaveShift(k) \
1556  AS2( add edi, 16) \
1557  AS2( add edx, 16) \
1558  SSE2_FirstMultiply(1)\
1559  Squ_Acc##i(i) \
1560  AS2( paddd xmm6, xmm6) \
1561  AS2( paddd xmm7, xmm7) \
1562  AS2( paddd xmm4, xmm4) \
1563  AS2( paddd xmm5, xmm5) \
1564  AS2( movq xmm0, QWORD PTR [esp+4])\
1565  AS2( movq xmm1, QWORD PTR [esp+12])\
1566  AS2( paddd xmm4, xmm0)\
1567  AS2( paddd xmm5, xmm1)\
1568 
1569 #define SSE2_MulAdd45 \
1570  AS2( movdqa xmm7, [esi]) \
1571  AS2( movdqa xmm0, [edi]) \
1572  AS2( pmuludq xmm0, xmm7) \
1573  AS2( movdqa xmm2, [ebx]) \
1574  AS2( pmuludq xmm7, [edx]) \
1575  AS2( movdqa xmm6, xmm2) \
1576  AS2( pand xmm2, xmm0) \
1577  AS2( psrld xmm0, 16) \
1578  AS2( paddd xmm4, xmm2) \
1579  AS2( paddd xmm5, xmm0) \
1580  AS2( pand xmm6, xmm7) \
1581  AS2( psrld xmm7, 16)
1582 
1583 #define Mul_Begin(n) \
1584  MulPrologue \
1585  AS2( mov esi, esp)\
1586  AS2( and esp, 0xfffffff0)\
1587  AS2( sub esp, 48*n+16)\
1588  AS1( push esi)\
1589  AS2( xor edx, edx) \
1590  ASL(1) \
1591  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1592  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1593  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1594  AS2( movdqa [esp+20+2*edx], xmm0) \
1595  AS2( psrlq xmm0, 32) \
1596  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1597  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1598  AS2( psrlq xmm1, 32) \
1599  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1600  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1601  AS2( psrlq xmm2, 32) \
1602  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1603  AS2( add edx, 16) \
1604  AS2( cmp edx, 8*(n)) \
1605  ASJ( jne, 1, b) \
1606  AS2( lea edi, [esp+20])\
1607  AS2( lea edx, [esp+20+16*n])\
1608  AS2( lea esi, [esp+20+32*n])\
1609  SSE2_FirstMultiply(0) \
1610 
1611 #define Mul_Acc(i) \
1612  ASL(LMul##i) \
1613  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1614  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1615  AS2( movdqa xmm2, [ebx]) \
1616  AS2( pmuludq xmm0, xmm1) \
1617  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1618  AS2( movdqa xmm3, xmm2) \
1619  AS2( pand xmm2, xmm0) \
1620  AS2( psrld xmm0, 16) \
1621  AS2( paddd xmm4, xmm2) \
1622  AS2( paddd xmm5, xmm0) \
1623  AS2( pand xmm3, xmm1) \
1624  AS2( psrld xmm1, 16) \
1625  AS2( paddd xmm6, xmm3) \
1626  AS2( paddd xmm7, xmm1) \
1627 
1628 #define Mul_Acc1(i)
1629 #define Mul_Acc2(i) ASC(call, LMul##i)
1630 #define Mul_Acc3(i) Mul_Acc2(i)
1631 #define Mul_Acc4(i) Mul_Acc2(i)
1632 #define Mul_Acc5(i) Mul_Acc2(i)
1633 #define Mul_Acc6(i) Mul_Acc2(i)
1634 #define Mul_Acc7(i) Mul_Acc2(i)
1635 #define Mul_Acc8(i) Mul_Acc2(i)
1636 #define Mul_Acc9(i) Mul_Acc2(i)
1637 #define Mul_Acc10(i) Mul_Acc2(i)
1638 #define Mul_Acc11(i) Mul_Acc2(i)
1639 #define Mul_Acc12(i) Mul_Acc2(i)
1640 #define Mul_Acc13(i) Mul_Acc2(i)
1641 #define Mul_Acc14(i) Mul_Acc2(i)
1642 #define Mul_Acc15(i) Mul_Acc2(i)
1643 #define Mul_Acc16(i) Mul_Acc2(i)
1644 
1645 #define Mul_Column1(k, i) \
1646  SSE2_SaveShift(k) \
1647  AS2( add esi, 16) \
1648  SSE2_MulAdd45\
1649  Mul_Acc##i(i) \
1650 
1651 #define Mul_Column0(k, i) \
1652  SSE2_SaveShift(k) \
1653  AS2( add edi, 16) \
1654  AS2( add edx, 16) \
1655  SSE2_MulAdd45\
1656  Mul_Acc##i(i) \
1657 
1658 #define Bot_Acc(i) \
1659  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1660  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1661  AS2( pmuludq xmm0, xmm1) \
1662  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1663  AS2( paddq xmm4, xmm0) \
1664  AS2( paddd xmm6, xmm1)
1665 
1666 #define Bot_SaveAcc(k) \
1667  SSE2_SaveShift(k) \
1668  AS2( add edi, 16) \
1669  AS2( add edx, 16) \
1670  AS2( movdqa xmm6, [esi]) \
1671  AS2( movdqa xmm0, [edi]) \
1672  AS2( pmuludq xmm0, xmm6) \
1673  AS2( paddq xmm4, xmm0) \
1674  AS2( psllq xmm5, 16) \
1675  AS2( paddq xmm4, xmm5) \
1676  AS2( pmuludq xmm6, [edx])
1677 
1678 #define Bot_End(n) \
1679  AS2( movhlps xmm7, xmm6) \
1680  AS2( paddd xmm6, xmm7) \
1681  AS2( psllq xmm6, 32) \
1682  AS2( paddd xmm4, xmm6) \
1683  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1684  AS1( pop esp)\
1685  MulEpilogue
1686 
1687 #define Top_Begin(n) \
1688  TopPrologue \
1689  AS2( mov edx, esp)\
1690  AS2( and esp, 0xfffffff0)\
1691  AS2( sub esp, 48*n+16)\
1692  AS1( push edx)\
1693  AS2( xor edx, edx) \
1694  ASL(1) \
1695  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1696  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1697  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1698  AS2( movdqa [esp+20+2*edx], xmm0) \
1699  AS2( psrlq xmm0, 32) \
1700  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1701  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1702  AS2( psrlq xmm1, 32) \
1703  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1704  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1705  AS2( psrlq xmm2, 32) \
1706  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1707  AS2( add edx, 16) \
1708  AS2( cmp edx, 8*(n)) \
1709  ASJ( jne, 1, b) \
1710  AS2( mov eax, esi) \
1711  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1712  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1713  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1714  AS2( pxor xmm4, xmm4)\
1715  AS2( pxor xmm5, xmm5)
1716 
1717 #define Top_Acc(i) \
1718  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1719  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1720  AS2( psrlq xmm0, 48) \
1721  AS2( paddd xmm5, xmm0)\
1722 
1723 #define Top_Column0(i) \
1724  AS2( psllq xmm5, 32) \
1725  AS2( add edi, 16) \
1726  AS2( add edx, 16) \
1727  SSE2_MulAdd45\
1728  Mul_Acc##i(i) \
1729 
1730 #define Top_Column1(i) \
1731  SSE2_SaveShift(0) \
1732  AS2( add esi, 16) \
1733  SSE2_MulAdd45\
1734  Mul_Acc##i(i) \
1735  AS2( shr eax, 16) \
1736  AS2( movd xmm0, eax)\
1737  AS2( movd xmm1, [ecx+4])\
1738  AS2( psrld xmm1, 16)\
1739  AS2( pcmpgtd xmm1, xmm0)\
1740  AS2( psrld xmm1, 31)\
1741  AS2( paddd xmm4, xmm1)\
1742 
1743 void SSE2_Square4(word *C, const word *A)
1744 {
1745  Squ_Begin(2)
1746  Squ_Column0(0, 1)
1747  Squ_End(2)
1748 }
1749 
1750 void SSE2_Square8(word *C, const word *A)
1751 {
1752  Squ_Begin(4)
1753 #ifndef __GNUC__
1754  ASJ( jmp, 0, f)
1755  Squ_Acc(2)
1756  AS1( ret) ASL(0)
1757 #endif
1758  Squ_Column0(0, 1)
1759  Squ_Column1(1, 1)
1760  Squ_Column0(2, 2)
1761  Squ_Column1(3, 1)
1762  Squ_Column0(4, 1)
1763  Squ_End(4)
1764 }
1765 
1766 void SSE2_Square16(word *C, const word *A)
1767 {
1768  Squ_Begin(8)
1769 #ifndef __GNUC__
1770  ASJ( jmp, 0, f)
1771  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1772  AS1( ret) ASL(0)
1773 #endif
1774  Squ_Column0(0, 1)
1775  Squ_Column1(1, 1)
1776  Squ_Column0(2, 2)
1777  Squ_Column1(3, 2)
1778  Squ_Column0(4, 3)
1779  Squ_Column1(5, 3)
1780  Squ_Column0(6, 4)
1781  Squ_Column1(7, 3)
1782  Squ_Column0(8, 3)
1783  Squ_Column1(9, 2)
1784  Squ_Column0(10, 2)
1785  Squ_Column1(11, 1)
1786  Squ_Column0(12, 1)
1787  Squ_End(8)
1788 }
1789 
1790 void SSE2_Square32(word *C, const word *A)
1791 {
1792  Squ_Begin(16)
1793  ASJ( jmp, 0, f)
1794  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1795  AS1( ret) ASL(0)
1796  Squ_Column0(0, 1)
1797  Squ_Column1(1, 1)
1798  Squ_Column0(2, 2)
1799  Squ_Column1(3, 2)
1800  Squ_Column0(4, 3)
1801  Squ_Column1(5, 3)
1802  Squ_Column0(6, 4)
1803  Squ_Column1(7, 4)
1804  Squ_Column0(8, 5)
1805  Squ_Column1(9, 5)
1806  Squ_Column0(10, 6)
1807  Squ_Column1(11, 6)
1808  Squ_Column0(12, 7)
1809  Squ_Column1(13, 7)
1810  Squ_Column0(14, 8)
1811  Squ_Column1(15, 7)
1812  Squ_Column0(16, 7)
1813  Squ_Column1(17, 6)
1814  Squ_Column0(18, 6)
1815  Squ_Column1(19, 5)
1816  Squ_Column0(20, 5)
1817  Squ_Column1(21, 4)
1818  Squ_Column0(22, 4)
1819  Squ_Column1(23, 3)
1820  Squ_Column0(24, 3)
1821  Squ_Column1(25, 2)
1822  Squ_Column0(26, 2)
1823  Squ_Column1(27, 1)
1824  Squ_Column0(28, 1)
1825  Squ_End(16)
1826 }
1827 
1828 void SSE2_Multiply4(word *C, const word *A, const word *B)
1829 {
1830  Mul_Begin(2)
1831 #ifndef __GNUC__
1832  ASJ( jmp, 0, f)
1833  Mul_Acc(2)
1834  AS1( ret) ASL(0)
1835 #endif
1836  Mul_Column0(0, 2)
1837  Mul_End(2)
1838 }
1839 
1840 void SSE2_Multiply8(word *C, const word *A, const word *B)
1841 {
1842  Mul_Begin(4)
1843 #ifndef __GNUC__
1844  ASJ( jmp, 0, f)
1845  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1846  AS1( ret) ASL(0)
1847 #endif
1848  Mul_Column0(0, 2)
1849  Mul_Column1(1, 3)
1850  Mul_Column0(2, 4)
1851  Mul_Column1(3, 3)
1852  Mul_Column0(4, 2)
1853  Mul_End(4)
1854 }
1855 
1856 void SSE2_Multiply16(word *C, const word *A, const word *B)
1857 {
1858  Mul_Begin(8)
1859 #ifndef __GNUC__
1860  ASJ( jmp, 0, f)
1861  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1862  AS1( ret) ASL(0)
1863 #endif
1864  Mul_Column0(0, 2)
1865  Mul_Column1(1, 3)
1866  Mul_Column0(2, 4)
1867  Mul_Column1(3, 5)
1868  Mul_Column0(4, 6)
1869  Mul_Column1(5, 7)
1870  Mul_Column0(6, 8)
1871  Mul_Column1(7, 7)
1872  Mul_Column0(8, 6)
1873  Mul_Column1(9, 5)
1874  Mul_Column0(10, 4)
1875  Mul_Column1(11, 3)
1876  Mul_Column0(12, 2)
1877  Mul_End(8)
1878 }
1879 
1880 void SSE2_Multiply32(word *C, const word *A, const word *B)
1881 {
1882  Mul_Begin(16)
1883  ASJ( jmp, 0, f)
1884  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1885  AS1( ret) ASL(0)
1886  Mul_Column0(0, 2)
1887  Mul_Column1(1, 3)
1888  Mul_Column0(2, 4)
1889  Mul_Column1(3, 5)
1890  Mul_Column0(4, 6)
1891  Mul_Column1(5, 7)
1892  Mul_Column0(6, 8)
1893  Mul_Column1(7, 9)
1894  Mul_Column0(8, 10)
1895  Mul_Column1(9, 11)
1896  Mul_Column0(10, 12)
1897  Mul_Column1(11, 13)
1898  Mul_Column0(12, 14)
1899  Mul_Column1(13, 15)
1900  Mul_Column0(14, 16)
1901  Mul_Column1(15, 15)
1902  Mul_Column0(16, 14)
1903  Mul_Column1(17, 13)
1904  Mul_Column0(18, 12)
1905  Mul_Column1(19, 11)
1906  Mul_Column0(20, 10)
1907  Mul_Column1(21, 9)
1908  Mul_Column0(22, 8)
1909  Mul_Column1(23, 7)
1910  Mul_Column0(24, 6)
1911  Mul_Column1(25, 5)
1912  Mul_Column0(26, 4)
1913  Mul_Column1(27, 3)
1914  Mul_Column0(28, 2)
1915  Mul_End(16)
1916 }
1917 
1918 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1919 {
1920  Mul_Begin(2)
1921  Bot_SaveAcc(0) Bot_Acc(2)
1922  Bot_End(2)
1923 }
1924 
1925 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1926 {
1927  Mul_Begin(4)
1928 #ifndef __GNUC__
1929  ASJ( jmp, 0, f)
1930  Mul_Acc(3) Mul_Acc(2)
1931  AS1( ret) ASL(0)
1932 #endif
1933  Mul_Column0(0, 2)
1934  Mul_Column1(1, 3)
1935  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1936  Bot_End(4)
1937 }
1938 
1939 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1940 {
1941  Mul_Begin(8)
1942 #ifndef __GNUC__
1943  ASJ( jmp, 0, f)
1944  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1945  AS1( ret) ASL(0)
1946 #endif
1947  Mul_Column0(0, 2)
1948  Mul_Column1(1, 3)
1949  Mul_Column0(2, 4)
1950  Mul_Column1(3, 5)
1951  Mul_Column0(4, 6)
1952  Mul_Column1(5, 7)
1953  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1954  Bot_End(8)
1955 }
1956 
1957 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1958 {
1959  Mul_Begin(16)
1960 #ifndef __GNUC__
1961  ASJ( jmp, 0, f)
1962  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1963  AS1( ret) ASL(0)
1964 #endif
1965  Mul_Column0(0, 2)
1966  Mul_Column1(1, 3)
1967  Mul_Column0(2, 4)
1968  Mul_Column1(3, 5)
1969  Mul_Column0(4, 6)
1970  Mul_Column1(5, 7)
1971  Mul_Column0(6, 8)
1972  Mul_Column1(7, 9)
1973  Mul_Column0(8, 10)
1974  Mul_Column1(9, 11)
1975  Mul_Column0(10, 12)
1976  Mul_Column1(11, 13)
1977  Mul_Column0(12, 14)
1978  Mul_Column1(13, 15)
1979  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1980  Bot_End(16)
1981 }
1982 
1983 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1984 {
1985  Top_Begin(4)
1986  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1987 #ifndef __GNUC__
1988  ASJ( jmp, 0, f)
1989  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1990  AS1( ret) ASL(0)
1991 #endif
1992  Top_Column0(4)
1993  Top_Column1(3)
1994  Mul_Column0(0, 2)
1995  Top_End(2)
1996 }
1997 
1998 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
1999 {
2000  Top_Begin(8)
2001  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2002 #ifndef __GNUC__
2003  ASJ( jmp, 0, f)
2004  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2005  AS1( ret) ASL(0)
2006 #endif
2007  Top_Column0(8)
2008  Top_Column1(7)
2009  Mul_Column0(0, 6)
2010  Mul_Column1(1, 5)
2011  Mul_Column0(2, 4)
2012  Mul_Column1(3, 3)
2013  Mul_Column0(4, 2)
2014  Top_End(4)
2015 }
2016 
2017 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2018 {
2019  Top_Begin(16)
2020  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2021 #ifndef __GNUC__
2022  ASJ( jmp, 0, f)
2023  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2024  AS1( ret) ASL(0)
2025 #endif
2026  Top_Column0(16)
2027  Top_Column1(15)
2028  Mul_Column0(0, 14)
2029  Mul_Column1(1, 13)
2030  Mul_Column0(2, 12)
2031  Mul_Column1(3, 11)
2032  Mul_Column0(4, 10)
2033  Mul_Column1(5, 9)
2034  Mul_Column0(6, 8)
2035  Mul_Column1(7, 7)
2036  Mul_Column0(8, 6)
2037  Mul_Column1(9, 5)
2038  Mul_Column0(10, 4)
2039  Mul_Column1(11, 3)
2040  Mul_Column0(12, 2)
2041  Top_End(8)
2042 }
2043 
2044 #endif // #if CRYPTOPP_INTEGER_SSE2
2045 
2046 // ********************************************************
2047 
2048 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2049 typedef void (* PMul)(word *C, const word *A, const word *B);
2050 typedef void (* PSqu)(word *C, const word *A);
2051 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2052 
2053 #if CRYPTOPP_INTEGER_SSE2
2054 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2055 static size_t s_recursionLimit = 8;
2056 #else
2057 static const size_t s_recursionLimit = 16;
2058 #endif
2059 
2060 static PMul s_pMul[9], s_pBot[9];
2061 static PSqu s_pSqu[9];
2062 static PMulTop s_pTop[9];
2063 
2064 static void SetFunctionPointers()
2065 {
2066  s_pMul[0] = &Baseline_Multiply2;
2067  s_pBot[0] = &Baseline_MultiplyBottom2;
2068  s_pSqu[0] = &Baseline_Square2;
2069  s_pTop[0] = &Baseline_MultiplyTop2;
2070  s_pTop[1] = &Baseline_MultiplyTop4;
2071 
2072 #if CRYPTOPP_INTEGER_SSE2
2073  if (HasSSE2())
2074  {
2075 #if _MSC_VER != 1200 || defined(NDEBUG)
2076  if (IsP4())
2077  {
2078  s_pAdd = &SSE2_Add;
2079  s_pSub = &SSE2_Sub;
2080  }
2081 #endif
2082 
2083  s_recursionLimit = 32;
2084 
2085  s_pMul[1] = &SSE2_Multiply4;
2086  s_pMul[2] = &SSE2_Multiply8;
2087  s_pMul[4] = &SSE2_Multiply16;
2088  s_pMul[8] = &SSE2_Multiply32;
2089 
2090  s_pBot[1] = &SSE2_MultiplyBottom4;
2091  s_pBot[2] = &SSE2_MultiplyBottom8;
2092  s_pBot[4] = &SSE2_MultiplyBottom16;
2093  s_pBot[8] = &SSE2_MultiplyBottom32;
2094 
2095  s_pSqu[1] = &SSE2_Square4;
2096  s_pSqu[2] = &SSE2_Square8;
2097  s_pSqu[4] = &SSE2_Square16;
2098  s_pSqu[8] = &SSE2_Square32;
2099 
2100  s_pTop[2] = &SSE2_MultiplyTop8;
2101  s_pTop[4] = &SSE2_MultiplyTop16;
2102  s_pTop[8] = &SSE2_MultiplyTop32;
2103  }
2104  else
2105 #endif
2106  {
2107  s_pMul[1] = &Baseline_Multiply4;
2108  s_pMul[2] = &Baseline_Multiply8;
2109 
2110  s_pBot[1] = &Baseline_MultiplyBottom4;
2111  s_pBot[2] = &Baseline_MultiplyBottom8;
2112 
2113  s_pSqu[1] = &Baseline_Square4;
2114  s_pSqu[2] = &Baseline_Square8;
2115 
2116  s_pTop[2] = &Baseline_MultiplyTop8;
2117 
2118 #if !CRYPTOPP_INTEGER_SSE2
2119  s_pMul[4] = &Baseline_Multiply16;
2120  s_pBot[4] = &Baseline_MultiplyBottom16;
2121  s_pSqu[4] = &Baseline_Square16;
2122  s_pTop[4] = &Baseline_MultiplyTop16;
2123 #endif
2124  }
2125 }
2126 
2127 inline int Add(word *C, const word *A, const word *B, size_t N)
2128 {
2129 #if CRYPTOPP_INTEGER_SSE2
2130  return s_pAdd(N, C, A, B);
2131 #else
2132  return Baseline_Add(N, C, A, B);
2133 #endif
2134 }
2135 
2136 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2137 {
2138 #if CRYPTOPP_INTEGER_SSE2
2139  return s_pSub(N, C, A, B);
2140 #else
2141  return Baseline_Sub(N, C, A, B);
2142 #endif
2143 }
2144 
2145 // ********************************************************
2146 
2147 
2148 #define A0 A
2149 #define A1 (A+N2)
2150 #define B0 B
2151 #define B1 (B+N2)
2152 
2153 #define T0 T
2154 #define T1 (T+N2)
2155 #define T2 (T+N)
2156 #define T3 (T+N+N2)
2157 
2158 #define R0 R
2159 #define R1 (R+N2)
2160 #define R2 (R+N)
2161 #define R3 (R+N+N2)
2162 
2163 // R[2*N] - result = A*B
2164 // T[2*N] - temporary work space
2165 // A[N] --- multiplier
2166 // B[N] --- multiplicant
2167 
2168 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2169 {
2170  assert(N>=2 && N%2==0);
2171 
2172  if (N <= s_recursionLimit)
2173  s_pMul[N/4](R, A, B);
2174  else
2175  {
2176  const size_t N2 = N/2;
2177 
2178  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2179  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2180 
2181  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2182  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2183 
2184  RecursiveMultiply(R2, T2, A1, B1, N2);
2185  RecursiveMultiply(T0, T2, R0, R1, N2);
2186  RecursiveMultiply(R0, T2, A0, B0, N2);
2187 
2188  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2189 
2190  int c2 = Add(R2, R2, R1, N2);
2191  int c3 = c2;
2192  c2 += Add(R1, R2, R0, N2);
2193  c3 += Add(R2, R2, R3, N2);
2194 
2195  if (AN2 == BN2)
2196  c3 -= Subtract(R1, R1, T0, N);
2197  else
2198  c3 += Add(R1, R1, T0, N);
2199 
2200  c3 += Increment(R2, N2, c2);
2201  assert (c3 >= 0 && c3 <= 2);
2202  Increment(R3, N2, c3);
2203  }
2204 }
2205 
2206 // R[2*N] - result = A*A
2207 // T[2*N] - temporary work space
2208 // A[N] --- number to be squared
2209 
2210 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2211 {
2212  assert(N && N%2==0);
2213 
2214  if (N <= s_recursionLimit)
2215  s_pSqu[N/4](R, A);
2216  else
2217  {
2218  const size_t N2 = N/2;
2219 
2220  RecursiveSquare(R0, T2, A0, N2);
2221  RecursiveSquare(R2, T2, A1, N2);
2222  RecursiveMultiply(T0, T2, A0, A1, N2);
2223 
2224  int carry = Add(R1, R1, T0, N);
2225  carry += Add(R1, R1, T0, N);
2226  Increment(R3, N2, carry);
2227  }
2228 }
2229 
2230 // R[N] - bottom half of A*B
2231 // T[3*N/2] - temporary work space
2232 // A[N] - multiplier
2233 // B[N] - multiplicant
2234 
2235 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2236 {
2237  assert(N>=2 && N%2==0);
2238 
2239  if (N <= s_recursionLimit)
2240  s_pBot[N/4](R, A, B);
2241  else
2242  {
2243  const size_t N2 = N/2;
2244 
2245  RecursiveMultiply(R, T, A0, B0, N2);
2246  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2247  Add(R1, R1, T0, N2);
2248  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2249  Add(R1, R1, T0, N2);
2250  }
2251 }
2252 
2253 // R[N] --- upper half of A*B
2254 // T[2*N] - temporary work space
2255 // L[N] --- lower half of A*B
2256 // A[N] --- multiplier
2257 // B[N] --- multiplicant
2258 
2259 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2260 {
2261  assert(N>=2 && N%2==0);
2262 
2263  if (N <= s_recursionLimit)
2264  s_pTop[N/4](R, A, B, L[N-1]);
2265  else
2266  {
2267  const size_t N2 = N/2;
2268 
2269  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2270  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2271 
2272  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2273  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2274 
2275  RecursiveMultiply(T0, T2, R0, R1, N2);
2276  RecursiveMultiply(R0, T2, A1, B1, N2);
2277 
2278  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2279 
2280  int t, c3;
2281  int c2 = Subtract(T2, L+N2, L, N2);
2282 
2283  if (AN2 == BN2)
2284  {
2285  c2 -= Add(T2, T2, T0, N2);
2286  t = (Compare(T2, R0, N2) == -1);
2287  c3 = t - Subtract(T2, T2, T1, N2);
2288  }
2289  else
2290  {
2291  c2 += Subtract(T2, T2, T0, N2);
2292  t = (Compare(T2, R0, N2) == -1);
2293  c3 = t + Add(T2, T2, T1, N2);
2294  }
2295 
2296  c2 += t;
2297  if (c2 >= 0)
2298  c3 += Increment(T2, N2, c2);
2299  else
2300  c3 -= Decrement(T2, N2, -c2);
2301  c3 += Add(R0, T2, R1, N2);
2302 
2303  assert (c3 >= 0 && c3 <= 2);
2304  Increment(R1, N2, c3);
2305  }
2306 }
2307 
2308 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2309 {
2310  RecursiveMultiply(R, T, A, B, N);
2311 }
2312 
2313 inline void Square(word *R, word *T, const word *A, size_t N)
2314 {
2315  RecursiveSquare(R, T, A, N);
2316 }
2317 
2318 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2319 {
2320  RecursiveMultiplyBottom(R, T, A, B, N);
2321 }
2322 
2323 // R[NA+NB] - result = A*B
2324 // T[NA+NB] - temporary work space
2325 // A[NA] ---- multiplier
2326 // B[NB] ---- multiplicant
2327 
2328 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2329 {
2330  if (NA == NB)
2331  {
2332  if (A == B)
2333  Square(R, T, A, NA);
2334  else
2335  Multiply(R, T, A, B, NA);
2336 
2337  return;
2338  }
2339 
2340  if (NA > NB)
2341  {
2342  std::swap(A, B);
2343  std::swap(NA, NB);
2344  }
2345 
2346  assert(NB % NA == 0);
2347 
2348  if (NA==2 && !A[1])
2349  {
2350  switch (A[0])
2351  {
2352  case 0:
2353  SetWords(R, 0, NB+2);
2354  return;
2355  case 1:
2356  CopyWords(R, B, NB);
2357  R[NB] = R[NB+1] = 0;
2358  return;
2359  default:
2360  R[NB] = LinearMultiply(R, B, A[0], NB);
2361  R[NB+1] = 0;
2362  return;
2363  }
2364  }
2365 
2366  size_t i;
2367  if ((NB/NA)%2 == 0)
2368  {
2369  Multiply(R, T, A, B, NA);
2370  CopyWords(T+2*NA, R+NA, NA);
2371 
2372  for (i=2*NA; i<NB; i+=2*NA)
2373  Multiply(T+NA+i, T, A, B+i, NA);
2374  for (i=NA; i<NB; i+=2*NA)
2375  Multiply(R+i, T, A, B+i, NA);
2376  }
2377  else
2378  {
2379  for (i=0; i<NB; i+=2*NA)
2380  Multiply(R+i, T, A, B+i, NA);
2381  for (i=NA; i<NB; i+=2*NA)
2382  Multiply(T+NA+i, T, A, B+i, NA);
2383  }
2384 
2385  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2386  Increment(R+NB, NA);
2387 }
2388 
2389 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2390 // T[3*N/2] - temporary work space
2391 // A[N] ----- an odd number as input
2392 
2393 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2394 {
2395  if (N==2)
2396  {
2397  T[0] = AtomicInverseModPower2(A[0]);
2398  T[1] = 0;
2399  s_pBot[0](T+2, T, A);
2400  TwosComplement(T+2, 2);
2401  Increment(T+2, 2, 2);
2402  s_pBot[0](R, T, T+2);
2403  }
2404  else
2405  {
2406  const size_t N2 = N/2;
2407  RecursiveInverseModPower2(R0, T0, A0, N2);
2408  T0[0] = 1;
2409  SetWords(T0+1, 0, N2-1);
2410  MultiplyTop(R1, T1, T0, R0, A0, N2);
2411  MultiplyBottom(T0, T1, R0, A1, N2);
2412  Add(T0, R1, T0, N2);
2413  TwosComplement(T0, N2);
2414  MultiplyBottom(R1, T1, R0, T0, N2);
2415  }
2416 }
2417 
2418 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2419 // T[3*N] - temporary work space
2420 // X[2*N] - number to be reduced
2421 // M[N] --- modulus
2422 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2423 
2424 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2425 {
2426 #if 1
2427  MultiplyBottom(R, T, X, U, N);
2428  MultiplyTop(T, T+N, X, R, M, N);
2429  word borrow = Subtract(T, X+N, T, N);
2430  // defend against timing attack by doing this Add even when not needed
2431  word carry = Add(T+N, T, M, N);
2432  assert(carry | !borrow);
2433  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2434  CopyWords(R, T + ((0-borrow) & N), N);
2435 #elif 0
2436  const word u = 0-U[0];
2437  Declare2Words(p)
2438  for (size_t i=0; i<N; i++)
2439  {
2440  const word t = u * X[i];
2441  word c = 0;
2442  for (size_t j=0; j<N; j+=2)
2443  {
2444  MultiplyWords(p, t, M[j]);
2445  Acc2WordsBy1(p, X[i+j]);
2446  Acc2WordsBy1(p, c);
2447  X[i+j] = LowWord(p);
2448  c = HighWord(p);
2449  MultiplyWords(p, t, M[j+1]);
2450  Acc2WordsBy1(p, X[i+j+1]);
2451  Acc2WordsBy1(p, c);
2452  X[i+j+1] = LowWord(p);
2453  c = HighWord(p);
2454  }
2455 
2456  if (Increment(X+N+i, N-i, c))
2457  while (!Subtract(X+N, X+N, M, N)) {}
2458  }
2459 
2460  memcpy(R, X+N, N*WORD_SIZE);
2461 #else
2462  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2463  for (size_t i=0; i<N; i++)
2464  {
2465  __m64 t = _mm_cvtsi32_si64(X[i]);
2466  t = _mm_mul_su32(t, u);
2467  __m64 c = _mm_setzero_si64();
2468  for (size_t j=0; j<N; j+=2)
2469  {
2470  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2471  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2472  c = _mm_add_si64(c, p);
2473  X[i+j] = _mm_cvtsi64_si32(c);
2474  c = _mm_srli_si64(c, 32);
2475  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2476  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2477  c = _mm_add_si64(c, p);
2478  X[i+j+1] = _mm_cvtsi64_si32(c);
2479  c = _mm_srli_si64(c, 32);
2480  }
2481 
2482  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2483  while (!Subtract(X+N, X+N, M, N)) {}
2484  }
2485 
2486  memcpy(R, X+N, N*WORD_SIZE);
2487  _mm_empty();
2488 #endif
2489 }
2490 
2491 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2492 // T[2*N] - temporary work space
2493 // X[2*N] - number to be reduced
2494 // M[N] --- modulus
2495 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2496 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2497 
2498 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2499 {
2500  assert(N%2==0 && N>=4);
2501 
2502 #define M0 M
2503 #define M1 (M+N2)
2504 #define V0 V
2505 #define V1 (V+N2)
2506 
2507 #define X0 X
2508 #define X1 (X+N2)
2509 #define X2 (X+N)
2510 #define X3 (X+N+N2)
2511 
2512  const size_t N2 = N/2;
2513  Multiply(T0, T2, V0, X3, N2);
2514  int c2 = Add(T0, T0, X0, N);
2515  MultiplyBottom(T3, T2, T0, U, N2);
2516  MultiplyTop(T2, R, T0, T3, M0, N2);
2517  c2 -= Subtract(T2, T1, T2, N2);
2518  Multiply(T0, R, T3, M1, N2);
2519  c2 -= Subtract(T0, T2, T0, N2);
2520  int c3 = -(int)Subtract(T1, X2, T1, N2);
2521  Multiply(R0, T2, V1, X3, N2);
2522  c3 += Add(R, R, T, N);
2523 
2524  if (c2>0)
2525  c3 += Increment(R1, N2);
2526  else if (c2<0)
2527  c3 -= Decrement(R1, N2, -c2);
2528 
2529  assert(c3>=-1 && c3<=1);
2530  if (c3>0)
2531  Subtract(R, R, M, N);
2532  else if (c3<0)
2533  Add(R, R, M, N);
2534 
2535 #undef M0
2536 #undef M1
2537 #undef V0
2538 #undef V1
2539 
2540 #undef X0
2541 #undef X1
2542 #undef X2
2543 #undef X3
2544 }
2545 
2546 #undef A0
2547 #undef A1
2548 #undef B0
2549 #undef B1
2550 
2551 #undef T0
2552 #undef T1
2553 #undef T2
2554 #undef T3
2555 
2556 #undef R0
2557 #undef R1
2558 #undef R2
2559 #undef R3
2560 
2561 /*
2562 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2563 static word SubatomicDivide(word *A, word B0, word B1)
2564 {
2565  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2566  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2567 
2568  // estimate the quotient: do a 2 word by 1 word divide
2569  word Q;
2570  if (B1+1 == 0)
2571  Q = A[2];
2572  else
2573  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2574 
2575  // now subtract Q*B from A
2576  DWord p = DWord::Multiply(B0, Q);
2577  DWord u = (DWord) A[0] - p.GetLowHalf();
2578  A[0] = u.GetLowHalf();
2579  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2580  A[1] = u.GetLowHalf();
2581  A[2] += u.GetHighHalf();
2582 
2583  // Q <= actual quotient, so fix it
2584  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2585  {
2586  u = (DWord) A[0] - B0;
2587  A[0] = u.GetLowHalf();
2588  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2589  A[1] = u.GetLowHalf();
2590  A[2] += u.GetHighHalf();
2591  Q++;
2592  assert(Q); // shouldn't overflow
2593  }
2594 
2595  return Q;
2596 }
2597 
2598 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2599 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2600 {
2601  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2602  {
2603  Q[0] = A[2];
2604  Q[1] = A[3];
2605  }
2606  else
2607  {
2608  word T[4];
2609  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2610  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2611  Q[0] = SubatomicDivide(T, B[0], B[1]);
2612 
2613 #ifndef NDEBUG
2614  // multiply quotient and divisor and add remainder, make sure it equals dividend
2615  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2616  word P[4];
2617  LowLevel::Multiply2(P, Q, B);
2618  Add(P, P, T, 4);
2619  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2620 #endif
2621  }
2622 }
2623 */
2624 
2625 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2626 {
2627  word T[4];
2628  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2629  Q[0] = q.GetLowHalf();
2630  Q[1] = q.GetHighHalf();
2631 
2632 #ifndef NDEBUG
2633  if (B[0] || B[1])
2634  {
2635  // multiply quotient and divisor and add remainder, make sure it equals dividend
2636  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2637  word P[4];
2638  s_pMul[0](P, Q, B);
2639  Add(P, P, T, 4);
2640  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2641  }
2642 #endif
2643 }
2644 
2645 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2646 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2647 {
2648  assert(N && N%2==0);
2649 
2650  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2651 
2652  word borrow = Subtract(R, R, T, N+2);
2653  assert(!borrow && !R[N+1]);
2654  CRYPTOPP_UNUSED(borrow);
2655 
2656  while (R[N] || Compare(R, B, N) >= 0)
2657  {
2658  R[N] -= Subtract(R, R, B, N);
2659  Q[1] += (++Q[0]==0);
2660  assert(Q[0] || Q[1]); // no overflow
2661  }
2662 }
2663 
2664 // R[NB] -------- remainder = A%B
2665 // Q[NA-NB+2] --- quotient = A/B
2666 // T[NA+3*(NB+2)] - temp work space
2667 // A[NA] -------- dividend
2668 // B[NB] -------- divisor
2669 
2670 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2671 {
2672  assert(NA && NB && NA%2==0 && NB%2==0);
2673  assert(B[NB-1] || B[NB-2]);
2674  assert(NB <= NA);
2675 
2676  // set up temporary work space
2677  word *const TA=T;
2678  word *const TB=T+NA+2;
2679  word *const TP=T+NA+2+NB;
2680 
2681  // copy B into TB and normalize it so that TB has highest bit set to 1
2682  unsigned shiftWords = (B[NB-1]==0);
2683  TB[0] = TB[NB-1] = 0;
2684  CopyWords(TB+shiftWords, B, NB-shiftWords);
2685  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2686  assert(shiftBits < WORD_BITS);
2687  ShiftWordsLeftByBits(TB, NB, shiftBits);
2688 
2689  // copy A into TA and normalize it
2690  TA[0] = TA[NA] = TA[NA+1] = 0;
2691  CopyWords(TA+shiftWords, A, NA);
2692  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2693 
2694  if (TA[NA+1]==0 && TA[NA] <= 1)
2695  {
2696  Q[NA-NB+1] = Q[NA-NB] = 0;
2697  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2698  {
2699  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2700  ++Q[NA-NB];
2701  }
2702  }
2703  else
2704  {
2705  NA+=2;
2706  assert(Compare(TA+NA-NB, TB, NB) < 0);
2707  }
2708 
2709  word BT[2];
2710  BT[0] = TB[NB-2] + 1;
2711  BT[1] = TB[NB-1] + (BT[0]==0);
2712 
2713  // start reducing TA mod TB, 2 words at a time
2714  for (size_t i=NA-2; i>=NB; i-=2)
2715  {
2716  AtomicDivide(Q+i-NB, TA+i-2, BT);
2717  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2718  }
2719 
2720  // copy TA into R, and denormalize it
2721  CopyWords(R, TA+shiftWords, NB);
2722  ShiftWordsRightByBits(R, NB, shiftBits);
2723 }
2724 
2725 static inline size_t EvenWordCount(const word *X, size_t N)
2726 {
2727  while (N && X[N-2]==0 && X[N-1]==0)
2728  N-=2;
2729  return N;
2730 }
2731 
2732 // return k
2733 // R[N] --- result = A^(-1) * 2^k mod M
2734 // T[4*N] - temporary work space
2735 // A[NA] -- number to take inverse of
2736 // M[N] --- modulus
2737 
2738 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2739 {
2740  assert(NA<=N && N && N%2==0);
2741 
2742  word *b = T;
2743  word *c = T+N;
2744  word *f = T+2*N;
2745  word *g = T+3*N;
2746  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2747  unsigned int k=0;
2748  bool s=false;
2749 
2750  SetWords(T, 0, 3*N);
2751  b[0]=1;
2752  CopyWords(f, A, NA);
2753  CopyWords(g, M, N);
2754 
2755  while (1)
2756  {
2757  word t=f[0];
2758  while (!t)
2759  {
2760  if (EvenWordCount(f, fgLen)==0)
2761  {
2762  SetWords(R, 0, N);
2763  return 0;
2764  }
2765 
2766  ShiftWordsRightByWords(f, fgLen, 1);
2767  bcLen += 2 * (c[bcLen-1] != 0);
2768  assert(bcLen <= N);
2769  ShiftWordsLeftByWords(c, bcLen, 1);
2770  k+=WORD_BITS;
2771  t=f[0];
2772  }
2773 
2774  // t must be non-0; otherwise undefined.
2775  unsigned int i = TrailingZeros(t);
2776  t >>= i;
2777  k += i;
2778 
2779  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2780  {
2781  if (s)
2782  Subtract(R, M, b, N);
2783  else
2784  CopyWords(R, b, N);
2785  return k;
2786  }
2787 
2788  ShiftWordsRightByBits(f, fgLen, i);
2789  t = ShiftWordsLeftByBits(c, bcLen, i);
2790  c[bcLen] += t;
2791  bcLen += 2 * (t!=0);
2792  assert(bcLen <= N);
2793 
2794  bool swap = Compare(f, g, fgLen)==-1;
2795  ConditionalSwapPointers(swap, f, g);
2796  ConditionalSwapPointers(swap, b, c);
2797  s ^= swap;
2798 
2799  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2800 
2801  Subtract(f, f, g, fgLen);
2802  t = Add(b, b, c, bcLen);
2803  b[bcLen] += t;
2804  bcLen += 2*t;
2805  assert(bcLen <= N);
2806  }
2807 }
2808 
2809 // R[N] - result = A/(2^k) mod M
2810 // A[N] - input
2811 // M[N] - modulus
2812 
2813 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2814 {
2815  CopyWords(R, A, N);
2816 
2817  while (k--)
2818  {
2819  if (R[0]%2==0)
2820  ShiftWordsRightByBits(R, N, 1);
2821  else
2822  {
2823  word carry = Add(R, R, M, N);
2824  ShiftWordsRightByBits(R, N, 1);
2825  R[N-1] += carry<<(WORD_BITS-1);
2826  }
2827  }
2828 }
2829 
2830 // R[N] - result = A*(2^k) mod M
2831 // A[N] - input
2832 // M[N] - modulus
2833 
2834 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2835 {
2836  CopyWords(R, A, N);
2837 
2838  while (k--)
2839  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2840  Subtract(R, R, M, N);
2841 }
2842 
2843 // ******************************************************************
2844 
2845 InitializeInteger::InitializeInteger()
2846 {
2847  if (!g_pAssignIntToInteger)
2848  {
2849  SetFunctionPointers();
2850  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2851  }
2852 }
2853 
2854 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2855 
2856 static inline size_t RoundupSize(size_t n)
2857 {
2858  if (n<=8)
2859  return RoundupSizeTable[n];
2860  else if (n<=16)
2861  return 16;
2862  else if (n<=32)
2863  return 32;
2864  else if (n<=64)
2865  return 64;
2866  else return size_t(1) << BitPrecision(n-1);
2867 }
2868 
2870  : reg(2), sign(POSITIVE)
2871 {
2872  reg[0] = reg[1] = 0;
2873 }
2874 
2876  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2877 {
2878  CopyWords(reg, t.reg, reg.size());
2879 }
2880 
2881 Integer::Integer(Sign s, lword value)
2882  : reg(2), sign(s)
2883 {
2884  reg[0] = word(value);
2885  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2886 }
2887 
2888 Integer::Integer(signed long value)
2889  : reg(2)
2890 {
2891  if (value >= 0)
2892  sign = POSITIVE;
2893  else
2894  {
2895  sign = NEGATIVE;
2896  value = -value;
2897  }
2898  reg[0] = word(value);
2899  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2900 }
2901 
2902 Integer::Integer(Sign s, word high, word low)
2903  : reg(2), sign(s)
2904 {
2905  reg[0] = low;
2906  reg[1] = high;
2907 }
2908 
2910 {
2911  if (ByteCount() > sizeof(long))
2912  return false;
2913 
2914  unsigned long value = (unsigned long)reg[0];
2915  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2916 
2917  if (sign==POSITIVE)
2918  return (signed long)value >= 0;
2919  else
2920  return -(signed long)value < 0;
2921 }
2922 
2923 signed long Integer::ConvertToLong() const
2924 {
2925  assert(IsConvertableToLong());
2926 
2927  unsigned long value = (unsigned long)reg[0];
2928  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2929  return sign==POSITIVE ? value : -(signed long)value;
2930 }
2931 
2932 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2933 {
2934  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2935 
2936  if(o == LITTLE_ENDIAN_ORDER)
2937  {
2938  SecByteBlock block(byteCount);
2939  encodedInteger.Get(block, block.size());
2940  std::reverse(block.begin(), block.begin()+block.size());
2941 
2942  Decode(block.begin(), block.size(), s);
2943  return;
2944  }
2945 
2946  Decode(encodedInteger, byteCount, s);
2947 }
2948 
2949 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2950 {
2951  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2952 
2953  if(o == LITTLE_ENDIAN_ORDER)
2954  {
2955  SecByteBlock block(byteCount);
2956 #if (CRYPTOPP_MSC_VERSION >= 1400)
2957  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2958  stdext::make_checked_array_iterator(block.begin(), block.size()));
2959 #else
2960  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2961 #endif
2962  Decode(block.begin(), block.size(), s);
2963  return;
2964  }
2965 
2966  Decode(encodedInteger, byteCount, s);
2967 }
2968 
2970 {
2971  BERDecode(bt);
2972 }
2973 
2975 {
2976  Randomize(rng, bitcount);
2977 }
2978 
2979 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2980 {
2981  if (!Randomize(rng, min, max, rnType, equiv, mod))
2983 }
2984 
2986 {
2987  Integer r((word)0, BitsToWords(e+1));
2988  r.SetBit(e);
2989  return r;
2990 }
2991 
2992 template <long i>
2994 {
2995  Integer * operator()() const
2996  {
2997  return new Integer(i);
2998  }
2999 };
3000 
3002 {
3003  return Singleton<Integer>().Ref();
3004 }
3005 
3007 {
3008  return Singleton<Integer, NewInteger<1> >().Ref();
3009 }
3010 
3012 {
3013  return Singleton<Integer, NewInteger<2> >().Ref();
3014 }
3015 
3016 bool Integer::operator!() const
3017 {
3018  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3019 }
3020 
3021 Integer& Integer::operator=(const Integer& t)
3022 {
3023  if (this != &t)
3024  {
3025  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3026  reg.New(RoundupSize(t.WordCount()));
3027  CopyWords(reg, t.reg, reg.size());
3028  sign = t.sign;
3029  }
3030  return *this;
3031 }
3032 
3033 bool Integer::GetBit(size_t n) const
3034 {
3035  if (n/WORD_BITS >= reg.size())
3036  return 0;
3037  else
3038  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3039 }
3040 
3041 void Integer::SetBit(size_t n, bool value)
3042 {
3043  if (value)
3044  {
3045  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3046  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3047  }
3048  else
3049  {
3050  if (n/WORD_BITS < reg.size())
3051  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3052  }
3053 }
3054 
3055 byte Integer::GetByte(size_t n) const
3056 {
3057  if (n/WORD_SIZE >= reg.size())
3058  return 0;
3059  else
3060  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3061 }
3062 
3063 void Integer::SetByte(size_t n, byte value)
3064 {
3065  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3066  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3067  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3068 }
3069 
3070 lword Integer::GetBits(size_t i, size_t n) const
3071 {
3072  lword v = 0;
3073  assert(n <= sizeof(v)*8);
3074  for (unsigned int j=0; j<n; j++)
3075  v |= lword(GetBit(i+j)) << j;
3076  return v;
3077 }
3078 
3079 Integer Integer::operator-() const
3080 {
3081  Integer result(*this);
3082  result.Negate();
3083  return result;
3084 }
3085 
3086 Integer Integer::AbsoluteValue() const
3087 {
3088  Integer result(*this);
3089  result.sign = POSITIVE;
3090  return result;
3091 }
3092 
3094 {
3095  reg.swap(a.reg);
3096  std::swap(sign, a.sign);
3097 }
3098 
3099 Integer::Integer(word value, size_t length)
3100  : reg(RoundupSize(length)), sign(POSITIVE)
3101 {
3102  reg[0] = value;
3103  SetWords(reg+1, 0, reg.size()-1);
3104 }
3105 
3106 template <class T>
3107 static Integer StringToInteger(const T *str, ByteOrder order)
3108 {
3109  assert( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3110 
3111  int radix, sign = 1;
3112  // GCC workaround
3113  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3114  unsigned int length;
3115  for (length = 0; str[length] != 0; length++) {}
3116 
3117  Integer v;
3118 
3119  if (length == 0)
3120  return Integer::Zero();
3121 
3122  switch (str[length-1])
3123  {
3124  case 'h':
3125  case 'H':
3126  radix=16;
3127  break;
3128  case 'o':
3129  case 'O':
3130  radix=8;
3131  break;
3132  case 'b':
3133  case 'B':
3134  radix=2;
3135  break;
3136  default:
3137  radix=10;
3138  }
3139 
3140  // 'str' is of length 1 or more
3141  if (str[0] == '-')
3142  {
3143  sign = -1;
3144  str += 1, length -= 1;
3145  }
3146 
3147  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3148  {
3149  radix = 16;
3150  str += 2, length -= 2;
3151  }
3152 
3153  if(order == BIG_ENDIAN_ORDER)
3154  {
3155  for (unsigned int i=0; i<length; i++)
3156  {
3157  int digit, ch = static_cast<int>(str[i]);
3158 
3159  if (ch >= '0' && ch <= '9')
3160  digit = ch - '0';
3161  else if (ch >= 'A' && ch <= 'F')
3162  digit = ch - 'A' + 10;
3163  else if (ch >= 'a' && ch <= 'f')
3164  digit = ch - 'a' + 10;
3165  else
3166  digit = radix;
3167 
3168  if (digit < radix)
3169  {
3170  v *= radix;
3171  v += digit;
3172  }
3173  }
3174  }
3175  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3176  {
3177  // Nibble high, low and count
3178  unsigned int nh = 0, nl = 0, nc = 0;
3179  Integer position(Integer::One());
3180 
3181  for (unsigned int i=0; i<length; i++)
3182  {
3183  int digit, ch = static_cast<int>(str[i]);
3184 
3185  if (ch >= '0' && ch <= '9')
3186  digit = ch - '0';
3187  else if (ch >= 'A' && ch <= 'F')
3188  digit = ch - 'A' + 10;
3189  else if (ch >= 'a' && ch <= 'f')
3190  digit = ch - 'a' + 10;
3191  else
3192  digit = radix;
3193 
3194  if (digit < radix)
3195  {
3196  if(nc++ == 0)
3197  nh = digit;
3198  else
3199  nl = digit;
3200 
3201  if(nc == 2)
3202  {
3203  v += position * (nh << 4 | nl);
3204  nc = 0, position <<= 8;
3205  }
3206  }
3207  }
3208 
3209  if(nc == 1)
3210  v += nh * position;
3211  }
3212  else // LITTLE_ENDIAN_ORDER && radix != 16
3213  {
3214  for (int i=static_cast<int>(length)-1; i>=0; i--)
3215  {
3216  int digit, ch = static_cast<int>(str[i]);
3217 
3218  if (ch >= '0' && ch <= '9')
3219  digit = ch - '0';
3220  else if (ch >= 'A' && ch <= 'F')
3221  digit = ch - 'A' + 10;
3222  else if (ch >= 'a' && ch <= 'f')
3223  digit = ch - 'a' + 10;
3224  else
3225  digit = radix;
3226 
3227  if (digit < radix)
3228  {
3229  v *= radix;
3230  v += digit;
3231  }
3232  }
3233  }
3234 
3235  if (sign == -1)
3236  v.Negate();
3237 
3238  return v;
3239 }
3240 
3241 Integer::Integer(const char *str, ByteOrder order)
3242  : reg(2), sign(POSITIVE)
3243 {
3244  *this = StringToInteger(str,order);
3245 }
3246 
3247 Integer::Integer(const wchar_t *str, ByteOrder order)
3248  : reg(2), sign(POSITIVE)
3249 {
3250  *this = StringToInteger(str,order);
3251 }
3252 
3253 unsigned int Integer::WordCount() const
3254 {
3255  return (unsigned int)CountWords(reg, reg.size());
3256 }
3257 
3258 unsigned int Integer::ByteCount() const
3259 {
3260  unsigned wordCount = WordCount();
3261  if (wordCount)
3262  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3263  else
3264  return 0;
3265 }
3266 
3267 unsigned int Integer::BitCount() const
3268 {
3269  unsigned wordCount = WordCount();
3270  if (wordCount)
3271  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3272  else
3273  return 0;
3274 }
3275 
3276 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3277 {
3278  StringStore store(input, inputLen);
3279  Decode(store, inputLen, s);
3280 }
3281 
3283 {
3284  assert(bt.MaxRetrievable() >= inputLen);
3285 
3286  byte b;
3287  bt.Peek(b);
3288  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3289 
3290  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3291  {
3292  bt.Skip(1);
3293  inputLen--;
3294  bt.Peek(b);
3295  }
3296 
3297  // The call to CleanNew is optimized away above -O0/-Og.
3298  const size_t size = RoundupSize(BytesToWords(inputLen));
3299  reg.CleanNew(size);
3300 
3301  assert(reg.SizeInBytes() >= inputLen);
3302  for (size_t i=inputLen; i > 0; i--)
3303  {
3304  bt.Get(b);
3305  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3306  }
3307 
3308  if (sign == NEGATIVE)
3309  {
3310  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3311  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3312  TwosComplement(reg, reg.size());
3313  }
3314 }
3315 
3316 size_t Integer::MinEncodedSize(Signedness signedness) const
3317 {
3318  unsigned int outputLen = STDMAX(1U, ByteCount());
3319  if (signedness == UNSIGNED)
3320  return outputLen;
3321  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3322  outputLen++;
3323  if (IsNegative() && *this < -Power2(outputLen*8-1))
3324  outputLen++;
3325  return outputLen;
3326 }
3327 
3328 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3329 {
3330  assert(output && outputLen);
3331  ArraySink sink(output, outputLen);
3332  Encode(sink, outputLen, signedness);
3333 }
3334 
3335 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3336 {
3337  if (signedness == UNSIGNED || NotNegative())
3338  {
3339  for (size_t i=outputLen; i > 0; i--)
3340  bt.Put(GetByte(i-1));
3341  }
3342  else
3343  {
3344  // take two's complement of *this
3345  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3346  temp.Encode(bt, outputLen, UNSIGNED);
3347  }
3348 }
3349 
3351 {
3352  DERGeneralEncoder enc(bt, INTEGER);
3354  enc.MessageEnd();
3355 }
3356 
3357 void Integer::BERDecode(const byte *input, size_t len)
3358 {
3359  StringStore store(input, len);
3360  BERDecode(store);
3361 }
3362 
3364 {
3365  BERGeneralDecoder dec(bt, INTEGER);
3366  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3367  BERDecodeError();
3368  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3369  dec.MessageEnd();
3370 }
3371 
3373 {
3374  DERGeneralEncoder enc(bt, OCTET_STRING);
3375  Encode(enc, length);
3376  enc.MessageEnd();
3377 }
3378 
3380 {
3381  BERGeneralDecoder dec(bt, OCTET_STRING);
3382  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3383  BERDecodeError();
3384  Decode(dec, length);
3385  dec.MessageEnd();
3386 }
3387 
3388 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3389 {
3390  ArraySink sink(output, len);
3391  return OpenPGPEncode(sink);
3392 }
3393 
3395 {
3396  word16 bitCount = word16(BitCount());
3397  bt.PutWord16(bitCount);
3398  size_t byteCount = BitsToBytes(bitCount);
3399  Encode(bt, byteCount);
3400  return 2 + byteCount;
3401 }
3402 
3403 void Integer::OpenPGPDecode(const byte *input, size_t len)
3404 {
3405  StringStore store(input, len);
3406  OpenPGPDecode(store);
3407 }
3408 
3410 {
3411  word16 bitCount;
3412  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3413  throw OpenPGPDecodeErr();
3414  Decode(bt, BitsToBytes(bitCount));
3415 }
3416 
3418 {
3419  const size_t nbytes = nbits/8 + 1;
3420  SecByteBlock buf(nbytes);
3421  rng.GenerateBlock(buf, nbytes);
3422  if (nbytes)
3423  buf[0] = (byte)Crop(buf[0], nbits % 8);
3424  Decode(buf, nbytes, UNSIGNED);
3425 }
3426 
3427 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3428 {
3429  if (min > max)
3430  throw InvalidArgument("Integer: Min must be no greater than Max");
3431 
3432  Integer range = max - min;
3433  const unsigned int nbits = range.BitCount();
3434 
3435  do
3436  {
3437  Randomize(rng, nbits);
3438  }
3439  while (*this > range);
3440 
3441  *this += min;
3442 }
3443 
3444 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3445 {
3446  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3447 }
3448 
3450 {
3451 public:
3452  KDF2_RNG(const byte *seed, size_t seedSize)
3453  : m_counter(0), m_counterAndSeed(seedSize + 4)
3454  {
3455  memcpy(m_counterAndSeed + 4, seed, seedSize);
3456  }
3457 
3458  void GenerateBlock(byte *output, size_t size)
3459  {
3460  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3461  ++m_counter;
3462  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3463  }
3464 
3465 private:
3466  word32 m_counter;
3467  SecByteBlock m_counterAndSeed;
3468 };
3469 
3470 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3471 {
3472  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3473  Integer max;
3474  if (!params.GetValue("Max", max))
3475  {
3476  int bitLength;
3477  if (params.GetIntValue("BitLength", bitLength))
3478  max = Integer::Power2(bitLength);
3479  else
3480  throw InvalidArgument("Integer: missing Max argument");
3481  }
3482  if (min > max)
3483  throw InvalidArgument("Integer: Min must be no greater than Max");
3484 
3485  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3486  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3487 
3488  if (equiv.IsNegative() || equiv >= mod)
3489  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3490 
3491  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3492 
3493  member_ptr<KDF2_RNG> kdf2Rng;
3495  if (params.GetValue(Name::Seed(), seed))
3496  {
3497  ByteQueue bq;
3498  DERSequenceEncoder seq(bq);
3499  min.DEREncode(seq);
3500  max.DEREncode(seq);
3501  equiv.DEREncode(seq);
3502  mod.DEREncode(seq);
3503  DEREncodeUnsigned(seq, rnType);
3504  DEREncodeOctetString(seq, seed.begin(), seed.size());
3505  seq.MessageEnd();
3506 
3507  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3508  bq.Get(finalSeed, finalSeed.size());
3509  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3510  }
3511  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3512 
3513  switch (rnType)
3514  {
3515  case ANY:
3516  if (mod == One())
3517  Randomize(rng, min, max);
3518  else
3519  {
3520  Integer min1 = min + (equiv-min)%mod;
3521  if (max < min1)
3522  return false;
3523  Randomize(rng, Zero(), (max - min1) / mod);
3524  *this *= mod;
3525  *this += min1;
3526  }
3527  return true;
3528 
3529  case PRIME:
3530  {
3531  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3532 
3533  int i;
3534  i = 0;
3535  while (1)
3536  {
3537  if (++i==16)
3538  {
3539  // check if there are any suitable primes in [min, max]
3540  Integer first = min;
3541  if (FirstPrime(first, max, equiv, mod, pSelector))
3542  {
3543  // if there is only one suitable prime, we're done
3544  *this = first;
3545  if (!FirstPrime(first, max, equiv, mod, pSelector))
3546  return true;
3547  }
3548  else
3549  return false;
3550  }
3551 
3552  Randomize(rng, min, max);
3553  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3554  return true;
3555  }
3556  }
3557 
3558  default:
3559  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3560  }
3561 }
3562 
3563 std::istream& operator>>(std::istream& in, Integer &a)
3564 {
3565  char c;
3566  unsigned int length = 0;
3567  SecBlock<char> str(length + 16);
3568 
3569  std::ws(in);
3570 
3571  do
3572  {
3573  in.read(&c, 1);
3574  str[length++] = c;
3575  if (length >= str.size())
3576  str.Grow(length + 16);
3577  }
3578  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3579 
3580  if (in.gcount())
3581  in.putback(c);
3582  str[length-1] = '\0';
3583  a = Integer(str);
3584 
3585  return in;
3586 }
3587 
3588 std::ostream& operator<<(std::ostream& out, const Integer &a)
3589 {
3590  // Get relevant conversion specifications from ostream.
3591  const long f = out.flags() & std::ios::basefield; // Get base digits.
3592  int base, block;
3593  char suffix;
3594  switch(f)
3595  {
3596  case std::ios::oct :
3597  base = 8;
3598  block = 8;
3599  suffix = 'o';
3600  break;
3601  case std::ios::hex :
3602  base = 16;
3603  block = 4;
3604  suffix = 'h';
3605  break;
3606  default :
3607  base = 10;
3608  block = 3;
3609  suffix = '.';
3610  }
3611 
3612  Integer temp1=a, temp2;
3613 
3614  if (a.IsNegative())
3615  {
3616  out << '-';
3617  temp1.Negate();
3618  }
3619 
3620  if (!a)
3621  out << '0';
3622 
3623  static const char upper[]="0123456789ABCDEF";
3624  static const char lower[]="0123456789abcdef";
3625 
3626  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3627  unsigned int i=0;
3628  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3629 
3630  while (!!temp1)
3631  {
3632  word digit;
3633  Integer::Divide(digit, temp2, temp1, base);
3634  s[i++]=vec[digit];
3635  temp1.swap(temp2);
3636  }
3637 
3638  while (i--)
3639  {
3640  out << s[i];
3641 // if (i && !(i%block))
3642 // out << ",";
3643  }
3644 
3645 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3646  if(out.flags() & std::ios_base::showbase)
3647  out << suffix;
3648 
3649  return out;
3650 #else
3651  return out << suffix;
3652 #endif
3653 }
3654 
3655 Integer& Integer::operator++()
3656 {
3657  if (NotNegative())
3658  {
3659  if (Increment(reg, reg.size()))
3660  {
3661  reg.CleanGrow(2*reg.size());
3662  reg[reg.size()/2]=1;
3663  }
3664  }
3665  else
3666  {
3667  word borrow = Decrement(reg, reg.size());
3668  assert(!borrow);
3669  CRYPTOPP_UNUSED(borrow);
3670 
3671  if (WordCount()==0)
3672  *this = Zero();
3673  }
3674  return *this;
3675 }
3676 
3677 Integer& Integer::operator--()
3678 {
3679  if (IsNegative())
3680  {
3681  if (Increment(reg, reg.size()))
3682  {
3683  reg.CleanGrow(2*reg.size());
3684  reg[reg.size()/2]=1;
3685  }
3686  }
3687  else
3688  {
3689  if (Decrement(reg, reg.size()))
3690  *this = -One();
3691  }
3692  return *this;
3693 }
3694 
3695 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3696 {
3697  int carry;
3698  if (a.reg.size() == b.reg.size())
3699  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3700  else if (a.reg.size() > b.reg.size())
3701  {
3702  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3703  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3704  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3705  }
3706  else
3707  {
3708  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3709  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3710  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3711  }
3712 
3713  if (carry)
3714  {
3715  sum.reg.CleanGrow(2*sum.reg.size());
3716  sum.reg[sum.reg.size()/2] = 1;
3717  }
3718  sum.sign = Integer::POSITIVE;
3719 }
3720 
3721 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3722 {
3723  unsigned aSize = a.WordCount();
3724  aSize += aSize%2;
3725  unsigned bSize = b.WordCount();
3726  bSize += bSize%2;
3727 
3728  if (aSize == bSize)
3729  {
3730  if (Compare(a.reg, b.reg, aSize) >= 0)
3731  {
3732  Subtract(diff.reg, a.reg, b.reg, aSize);
3733  diff.sign = Integer::POSITIVE;
3734  }
3735  else
3736  {
3737  Subtract(diff.reg, b.reg, a.reg, aSize);
3738  diff.sign = Integer::NEGATIVE;
3739  }
3740  }
3741  else if (aSize > bSize)
3742  {
3743  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3744  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3745  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3746  assert(!borrow);
3747  diff.sign = Integer::POSITIVE;
3748  }
3749  else
3750  {
3751  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3752  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3753  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3754  assert(!borrow);
3755  diff.sign = Integer::NEGATIVE;
3756  }
3757 }
3758 
3759 // MSVC .NET 2003 workaround
3760 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3761 {
3762  return a < b ? b : a;
3763 }
3764 
3765 Integer Integer::Plus(const Integer& b) const
3766 {
3767  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3768  if (NotNegative())
3769  {
3770  if (b.NotNegative())
3771  PositiveAdd(sum, *this, b);
3772  else
3773  PositiveSubtract(sum, *this, b);
3774  }
3775  else
3776  {
3777  if (b.NotNegative())
3778  PositiveSubtract(sum, b, *this);
3779  else
3780  {
3781  PositiveAdd(sum, *this, b);
3782  sum.sign = Integer::NEGATIVE;
3783  }
3784  }
3785  return sum;
3786 }
3787 
3788 Integer& Integer::operator+=(const Integer& t)
3789 {
3790  reg.CleanGrow(t.reg.size());
3791  if (NotNegative())
3792  {
3793  if (t.NotNegative())
3794  PositiveAdd(*this, *this, t);
3795  else
3796  PositiveSubtract(*this, *this, t);
3797  }
3798  else
3799  {
3800  if (t.NotNegative())
3801  PositiveSubtract(*this, t, *this);
3802  else
3803  {
3804  PositiveAdd(*this, *this, t);
3805  sign = Integer::NEGATIVE;
3806  }
3807  }
3808  return *this;
3809 }
3810 
3811 Integer Integer::Minus(const Integer& b) const
3812 {
3813  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3814  if (NotNegative())
3815  {
3816  if (b.NotNegative())
3817  PositiveSubtract(diff, *this, b);
3818  else
3819  PositiveAdd(diff, *this, b);
3820  }
3821  else
3822  {
3823  if (b.NotNegative())
3824  {
3825  PositiveAdd(diff, *this, b);
3826  diff.sign = Integer::NEGATIVE;
3827  }
3828  else
3829  PositiveSubtract(diff, b, *this);
3830  }
3831  return diff;
3832 }
3833 
3834 Integer& Integer::operator-=(const Integer& t)
3835 {
3836  reg.CleanGrow(t.reg.size());
3837  if (NotNegative())
3838  {
3839  if (t.NotNegative())
3840  PositiveSubtract(*this, *this, t);
3841  else
3842  PositiveAdd(*this, *this, t);
3843  }
3844  else
3845  {
3846  if (t.NotNegative())
3847  {
3848  PositiveAdd(*this, *this, t);
3849  sign = Integer::NEGATIVE;
3850  }
3851  else
3852  PositiveSubtract(*this, t, *this);
3853  }
3854  return *this;
3855 }
3856 
3857 Integer& Integer::operator<<=(size_t n)
3858 {
3859  const size_t wordCount = WordCount();
3860  const size_t shiftWords = n / WORD_BITS;
3861  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3862 
3863  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3864  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3865  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3866  return *this;
3867 }
3868 
3869 Integer& Integer::operator>>=(size_t n)
3870 {
3871  const size_t wordCount = WordCount();
3872  const size_t shiftWords = n / WORD_BITS;
3873  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3874 
3875  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3876  if (wordCount > shiftWords)
3877  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3878  if (IsNegative() && WordCount()==0) // avoid -0
3879  *this = Zero();
3880  return *this;
3881 }
3882 
3883 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3884 {
3885  size_t aSize = RoundupSize(a.WordCount());
3886  size_t bSize = RoundupSize(b.WordCount());
3887 
3888  product.reg.CleanNew(RoundupSize(aSize+bSize));
3889  product.sign = Integer::POSITIVE;
3890 
3891  IntegerSecBlock workspace(aSize + bSize);
3892  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3893 }
3894 
3895 void Multiply(Integer &product, const Integer &a, const Integer &b)
3896 {
3897  PositiveMultiply(product, a, b);
3898 
3899  if (a.NotNegative() != b.NotNegative())
3900  product.Negate();
3901 }
3902 
3904 {
3905  Integer product;
3906  Multiply(product, *this, b);
3907  return product;
3908 }
3909 
3910 /*
3911 void PositiveDivide(Integer &remainder, Integer &quotient,
3912  const Integer &dividend, const Integer &divisor)
3913 {
3914  remainder.reg.CleanNew(divisor.reg.size());
3915  remainder.sign = Integer::POSITIVE;
3916  quotient.reg.New(0);
3917  quotient.sign = Integer::POSITIVE;
3918  unsigned i=dividend.BitCount();
3919  while (i--)
3920  {
3921  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3922  remainder.reg[0] |= dividend[i];
3923  if (overflow || remainder >= divisor)
3924  {
3925  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3926  quotient.SetBit(i);
3927  }
3928  }
3929 }
3930 */
3931 
3932 void PositiveDivide(Integer &remainder, Integer &quotient,
3933  const Integer &a, const Integer &b)
3934 {
3935  unsigned aSize = a.WordCount();
3936  unsigned bSize = b.WordCount();
3937 
3938  if (!bSize)
3939  throw Integer::DivideByZero();
3940 
3941  if (aSize < bSize)
3942  {
3943  remainder = a;
3944  remainder.sign = Integer::POSITIVE;
3945  quotient = Integer::Zero();
3946  return;
3947  }
3948 
3949  aSize += aSize%2; // round up to next even number
3950  bSize += bSize%2;
3951 
3952  remainder.reg.CleanNew(RoundupSize(bSize));
3953  remainder.sign = Integer::POSITIVE;
3954  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3955  quotient.sign = Integer::POSITIVE;
3956 
3957  IntegerSecBlock T(aSize+3*(bSize+2));
3958  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3959 }
3960 
3961 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3962 {
3963  PositiveDivide(remainder, quotient, dividend, divisor);
3964 
3965  if (dividend.IsNegative())
3966  {
3967  quotient.Negate();
3968  if (remainder.NotZero())
3969  {
3970  --quotient;
3971  remainder = divisor.AbsoluteValue() - remainder;
3972  }
3973  }
3974 
3975  if (divisor.IsNegative())
3976  quotient.Negate();
3977 }
3978 
3979 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3980 {
3981  q = a;
3982  q >>= n;
3983 
3984  const size_t wordCount = BitsToWords(n);
3985  if (wordCount <= a.WordCount())
3986  {
3987  r.reg.resize(RoundupSize(wordCount));
3988  CopyWords(r.reg, a.reg, wordCount);
3989  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
3990  if (n % WORD_BITS != 0)
3991  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
3992  }
3993  else
3994  {
3995  r.reg.resize(RoundupSize(a.WordCount()));
3996  CopyWords(r.reg, a.reg, r.reg.size());
3997  }
3998  r.sign = POSITIVE;
3999 
4000  if (a.IsNegative() && r.NotZero())
4001  {
4002  --q;
4003  r = Power2(n) - r;
4004  }
4005 }
4006 
4007 Integer Integer::DividedBy(const Integer &b) const
4008 {
4009  Integer remainder, quotient;
4010  Integer::Divide(remainder, quotient, *this, b);
4011  return quotient;
4012 }
4013 
4015 {
4016  Integer remainder, quotient;
4017  Integer::Divide(remainder, quotient, *this, b);
4018  return remainder;
4019 }
4020 
4021 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4022 {
4023  if (!divisor)
4024  throw Integer::DivideByZero();
4025 
4026  assert(divisor);
4027 
4028  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4029  {
4030  quotient = dividend >> (BitPrecision(divisor)-1);
4031  remainder = dividend.reg[0] & (divisor-1);
4032  return;
4033  }
4034 
4035  unsigned int i = dividend.WordCount();
4036  quotient.reg.CleanNew(RoundupSize(i));
4037  remainder = 0;
4038  while (i--)
4039  {
4040  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4041  remainder = DWord(dividend.reg[i], remainder) % divisor;
4042  }
4043 
4044  if (dividend.NotNegative())
4045  quotient.sign = POSITIVE;
4046  else
4047  {
4048  quotient.sign = NEGATIVE;
4049  if (remainder)
4050  {
4051  --quotient;
4052  remainder = divisor - remainder;
4053  }
4054  }
4055 }
4056 
4057 Integer Integer::DividedBy(word b) const
4058 {
4059  word remainder;
4060  Integer quotient;
4061  Integer::Divide(remainder, quotient, *this, b);
4062  return quotient;
4063 }
4064 
4065 word Integer::Modulo(word divisor) const
4066 {
4067  if (!divisor)
4068  throw Integer::DivideByZero();
4069 
4070  assert(divisor);
4071 
4072  word remainder;
4073 
4074  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4075  remainder = reg[0] & (divisor-1);
4076  else
4077  {
4078  unsigned int i = WordCount();
4079 
4080  if (divisor <= 5)
4081  {
4082  DWord sum(0, 0);
4083  while (i--)
4084  sum += reg[i];
4085  remainder = sum % divisor;
4086  }
4087  else
4088  {
4089  remainder = 0;
4090  while (i--)
4091  remainder = DWord(reg[i], remainder) % divisor;
4092  }
4093  }
4094 
4095  if (IsNegative() && remainder)
4096  remainder = divisor - remainder;
4097 
4098  return remainder;
4099 }
4100 
4102 {
4103  if (!!(*this)) // don't flip sign if *this==0
4104  sign = Sign(1-sign);
4105 }
4106 
4107 int Integer::PositiveCompare(const Integer& t) const
4108 {
4109  unsigned size = WordCount(), tSize = t.WordCount();
4110 
4111  if (size == tSize)
4112  return CryptoPP::Compare(reg, t.reg, size);
4113  else
4114  return size > tSize ? 1 : -1;
4115 }
4116 
4117 int Integer::Compare(const Integer& t) const
4118 {
4119  if (NotNegative())
4120  {
4121  if (t.NotNegative())
4122  return PositiveCompare(t);
4123  else
4124  return 1;
4125  }
4126  else
4127  {
4128  if (t.NotNegative())
4129  return -1;
4130  else
4131  return -PositiveCompare(t);
4132  }
4133 }
4134 
4136 {
4137  if (!IsPositive())
4138  return Zero();
4139 
4140  // overestimate square root
4141  Integer x, y = Power2((BitCount()+1)/2);
4142  assert(y*y >= *this);
4143 
4144  do
4145  {
4146  x = y;
4147  y = (x + *this/x) >> 1;
4148  } while (y<x);
4149 
4150  return x;
4151 }
4152 
4153 bool Integer::IsSquare() const
4154 {
4155  Integer r = SquareRoot();
4156  return *this == r.Squared();
4157 }
4158 
4159 bool Integer::IsUnit() const
4160 {
4161  return (WordCount() == 1) && (reg[0] == 1);
4162 }
4163 
4165 {
4166  return IsUnit() ? *this : Zero();
4167 }
4168 
4169 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4170 {
4171  return x*y%m;
4172 }
4173 
4174 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4175 {
4176  ModularArithmetic mr(m);
4177  return mr.Exponentiate(x, e);
4178 }
4179 
4181 {
4182  return EuclideanDomainOf<Integer>().Gcd(a, b);
4183 }
4184 
4186 {
4187  assert(m.NotNegative());
4188 
4189  if (IsNegative())
4190  return Modulo(m).InverseMod(m);
4191 
4192  if (m.IsEven())
4193  {
4194  if (!m || IsEven())
4195  return Zero(); // no inverse
4196  if (*this == One())
4197  return One();
4198 
4199  Integer u = m.Modulo(*this).InverseMod(*this);
4200  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4201  }
4202 
4203  SecBlock<word> T(m.reg.size() * 4);
4204  Integer r((word)0, m.reg.size());
4205  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4206  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4207  return r;
4208 }
4209 
4210 word Integer::InverseMod(word mod) const
4211 {
4212  word g0 = mod, g1 = *this % mod;
4213  word v0 = 0, v1 = 1;
4214  word y;
4215 
4216  while (g1)
4217  {
4218  if (g1 == 1)
4219  return v1;
4220  y = g0 / g1;
4221  g0 = g0 % g1;
4222  v0 += y * v1;
4223 
4224  if (!g0)
4225  break;
4226  if (g0 == 1)
4227  return mod-v0;
4228  y = g1 / g0;
4229  g1 = g1 % g0;
4230  v1 += y * v0;
4231  }
4232  return 0;
4233 }
4234 
4235 // ********************************************************
4236 
4238 {
4239  BERSequenceDecoder seq(bt);
4240  OID oid(seq);
4241  if (oid != ASN1::prime_field())
4242  BERDecodeError();
4243  m_modulus.BERDecode(seq);
4244  seq.MessageEnd();
4245  m_result.reg.resize(m_modulus.reg.size());
4246 }
4247 
4249 {
4250  DERSequenceEncoder seq(bt);
4251  ASN1::prime_field().DEREncode(seq);
4252  m_modulus.DEREncode(seq);
4253  seq.MessageEnd();
4254 }
4255 
4257 {
4258  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4259 }
4260 
4262 {
4263  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4264 }
4265 
4267 {
4268  if (a.reg.size()==m_modulus.reg.size())
4269  {
4270  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4271  return m_result;
4272  }
4273  else
4274  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4275 }
4276 
4277 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4278 {
4279  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4280  {
4281  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4282  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4283  {
4284  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4285  }
4286  return m_result;
4287  }
4288  else
4289  {
4290  m_result1 = a+b;
4291  if (m_result1 >= m_modulus)
4292  m_result1 -= m_modulus;
4293  return m_result1;
4294  }
4295 }
4296 
4298 {
4299  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4300  {
4301  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4302  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4303  {
4304  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4305  }
4306  }
4307  else
4308  {
4309  a+=b;
4310  if (a>=m_modulus)
4311  a-=m_modulus;
4312  }
4313 
4314  return a;
4315 }
4316 
4317 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4318 {
4319  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4320  {
4321  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4322  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4323  return m_result;
4324  }
4325  else
4326  {
4327  m_result1 = a-b;
4328  if (m_result1.IsNegative())
4329  m_result1 += m_modulus;
4330  return m_result1;
4331  }
4332 }
4333 
4335 {
4336  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4337  {
4338  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4339  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4340  }
4341  else
4342  {
4343  a-=b;
4344  if (a.IsNegative())
4345  a+=m_modulus;
4346  }
4347 
4348  return a;
4349 }
4350 
4352 {
4353  if (!a)
4354  return a;
4355 
4356  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4357  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4358  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4359 
4360  return m_result;
4361 }
4362 
4363 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4364 {
4365  if (m_modulus.IsOdd())
4366  {
4367  MontgomeryRepresentation dr(m_modulus);
4368  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4369  }
4370  else
4371  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4372 }
4373 
4374 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4375 {
4376  if (m_modulus.IsOdd())
4377  {
4378  MontgomeryRepresentation dr(m_modulus);
4379  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4380  for (unsigned int i=0; i<exponentsCount; i++)
4381  results[i] = dr.ConvertOut(results[i]);
4382  }
4383  else
4384  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4385 }
4386 
4388  : ModularArithmetic(m),
4389  m_u((word)0, m_modulus.reg.size()),
4390  m_workspace(5*m_modulus.reg.size())
4391 {
4392  if (!m_modulus.IsOdd())
4393  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4394 
4395  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4396 }
4397 
4399 {
4400  word *const T = m_workspace.begin();
4401  word *const R = m_result.reg.begin();
4402  const size_t N = m_modulus.reg.size();
4403  assert(a.reg.size()<=N && b.reg.size()<=N);
4404 
4405  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4406  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4407  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4408  return m_result;
4409 }
4410 
4412 {
4413  word *const T = m_workspace.begin();
4414  word *const R = m_result.reg.begin();
4415  const size_t N = m_modulus.reg.size();
4416  assert(a.reg.size()<=N);
4417 
4418  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4419  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4420  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4421  return m_result;
4422 }
4423 
4425 {
4426  word *const T = m_workspace.begin();
4427  word *const R = m_result.reg.begin();
4428  const size_t N = m_modulus.reg.size();
4429  assert(a.reg.size()<=N);
4430 
4431  CopyWords(T, a.reg, a.reg.size());
4432  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4433  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4434  return m_result;
4435 }
4436 
4438 {
4439 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4440  word *const T = m_workspace.begin();
4441  word *const R = m_result.reg.begin();
4442  const size_t N = m_modulus.reg.size();
4443  assert(a.reg.size()<=N);
4444 
4445  CopyWords(T, a.reg, a.reg.size());
4446  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4447  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4448  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4449 
4450 // cout << "k=" << k << " N*32=" << 32*N << endl;
4451 
4452  if (k>N*WORD_BITS)
4453  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4454  else
4455  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4456 
4457  return m_result;
4458 }
4459 
4460 // Specialization declared in misc.h to allow us to print integers
4461 // with additional control options, like arbirary bases and uppercase.
4462 template <> CRYPTOPP_DLL
4463 std::string IntToString<Integer>(Integer value, unsigned int base)
4464 {
4465  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4466  static const unsigned int BIT_32 = (1U << 31);
4467  const bool UPPER = !!(base & BIT_32);
4468  static const unsigned int BIT_31 = (1U << 30);
4469  const bool BASE = !!(base & BIT_31);
4470 
4471  const char CH = UPPER ? 'A' : 'a';
4472  base &= ~(BIT_32|BIT_31);
4473  assert(base >= 2 && base <= 32);
4474 
4475  if (value == 0)
4476  return "0";
4477 
4478  bool negative = false, zero = false;
4479  if (value.IsNegative())
4480  {
4481  negative = true;
4482  value.Negate();
4483  }
4484 
4485  if (!value)
4486  zero = true;
4487 
4488  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4489  Integer temp;
4490 
4491  unsigned int i=0;
4492  while (!!value)
4493  {
4494  word digit;
4495  Integer::Divide(digit, temp, value, word(base));
4496  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4497  value.swap(temp);
4498  }
4499 
4500  std::string result;
4501  result.reserve(i+2);
4502 
4503  if (negative)
4504  result += '-';
4505 
4506  if (zero)
4507  result += '0';
4508 
4509  while (i--)
4510  result += s[i];
4511 
4512  if (BASE)
4513  {
4514  if (base == 10)
4515  result += '.';
4516  else if (base == 16)
4517  result += 'h';
4518  else if (base == 8)
4519  result += 'o';
4520  else if (base == 2)
4521  result += 'b';
4522  }
4523 
4524  return result;
4525 }
4526 
4527 // Specialization declared in misc.h to avoid Coverity findings.
4528 template <> CRYPTOPP_DLL
4529 std::string IntToString<word64>(word64 value, unsigned int base)
4530 {
4531  // Hack... set the high bit for uppercase.
4532  static const unsigned int HIGH_BIT = (1U << 31);
4533  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4534  base &= ~HIGH_BIT;
4535 
4536  assert(base >= 2);
4537  if (value == 0)
4538  return "0";
4539 
4540  std::string result;
4541  while (value > 0)
4542  {
4543  word64 digit = value % base;
4544  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4545  value /= base;
4546  }
4547  return result;
4548 }
4549 
4550 NAMESPACE_END
4551 
4552 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:182
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4248
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4363
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4411
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4437
a number which is probabilistically prime
Definition: integer.h:91
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:123
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:264
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:329
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3033
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3328
an unsigned value
Definition: integer.h:81
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4334
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:538
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:348
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:442
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:330
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:740
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4180
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:705
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:760
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4317
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:623
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:689
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:344
Secure memory block with allocator and cleanup.
Definition: secblock.h:437
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3253
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3403
Signedness
Used when importing and exporting integers.
Definition: integer.h:79
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:524
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:291
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3055
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4387
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3979
Ring of congruence classes modulo n.
Definition: modarith.h:34
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1186
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:750
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3417
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4529
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3316
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3063
the value is negative
Definition: integer.h:73
SecBlock<byte> typedef.
Definition: secblock.h:731
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:294
Interface for buffered transformations.
Definition: cryptlib.h:1352
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3372
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2909
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4164
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3006
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:371
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4277
byte order is little-endian
Definition: cryptlib.h:125
Sign
Used internally to represent the integer.
Definition: integer.h:69
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4153
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3388
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3267
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4261
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4159
Copy input to a memory buffer.
Definition: filters.h:1016
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4135
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:338
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:335
a number with no special properties
Definition: integer.h:89
Integer Squared() const
Definition: integer.h:496
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1378
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:332
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3093
Integer()
Creates the zero integer.
Definition: integer.cpp:2869
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:670
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2923
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:278
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4101
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:728
Integer Times(const Integer &b) const
Definition: integer.cpp:3903
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3379
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1054
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2985
Multiple precision integer with arithmetic operations.
Definition: integer.h:45
a signed value
Definition: integer.h:83
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3011
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4424
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:557
RandomNumberType
Properties of a random integer.
Definition: integer.h:87
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4351
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:127
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:467
String-based implementation of Store interface.
Definition: filters.h:1066
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3961
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:47
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:61
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3041
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:754
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:499
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4297
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation&#39;s attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4266
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3350
Exception thrown when division by 0 is encountered.
Definition: integer.h:51
DER Sequence Encoder.
Definition: asn.h:304
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:976
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:59
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:271
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:236
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4398
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3458
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4185
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3276
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:718
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:500
Multiple precision integer with arithmetic operations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4256
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3001
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:477
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:673
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3357
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4463
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3070
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:519
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:538
BER General Decoder.
Definition: asn.h:234
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4374
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4117
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:158
Integer Modulo(const Integer &b) const
Definition: integer.cpp:4014
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:645
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3258
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
Definition: algebra.cpp:316
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:71
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:335
Interface for retrieving values given their names.
Definition: cryptlib.h:277