libstdc++
opt_random.h
Go to the documentation of this file.
00001 // Optimizations for random number functions, x86 version -*- C++ -*-
00002 
00003 // Copyright (C) 2012-2016 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/opt_random.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{random}
00028  */
00029 
00030 #ifndef _BITS_OPT_RANDOM_H
00031 #define _BITS_OPT_RANDOM_H 1
00032 
00033 #ifdef __SSE3__
00034 #include <pmmintrin.h>
00035 #endif
00036 
00037 
00038 #pragma GCC system_header
00039 
00040 
00041 namespace std _GLIBCXX_VISIBILITY(default)
00042 {
00043 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00044 
00045 #ifdef __SSE3__
00046   template<>
00047     template<typename _UniformRandomNumberGenerator>
00048       void
00049       normal_distribution<double>::
00050       __generate(typename normal_distribution<double>::result_type* __f,
00051                  typename normal_distribution<double>::result_type* __t,
00052                  _UniformRandomNumberGenerator& __urng,
00053                  const param_type& __param)
00054       {
00055         typedef uint64_t __uctype;
00056 
00057         if (__f == __t)
00058           return;
00059 
00060         if (_M_saved_available)
00061           {
00062             _M_saved_available = false;
00063             *__f++ = _M_saved * __param.stddev() + __param.mean();
00064 
00065             if (__f == __t)
00066               return;
00067           }
00068 
00069         constexpr uint64_t __maskval = 0xfffffffffffffull;
00070         static const __m128i __mask = _mm_set1_epi64x(__maskval);
00071         static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
00072         static const __m128d __three = _mm_set1_pd(3.0);
00073         const __m128d __av = _mm_set1_pd(__param.mean());
00074 
00075         const __uctype __urngmin = __urng.min();
00076         const __uctype __urngmax = __urng.max();
00077         const __uctype __urngrange = __urngmax - __urngmin;
00078         const __uctype __uerngrange = __urngrange + 1;
00079 
00080         while (__f + 1 < __t)
00081           {
00082             double __le;
00083             __m128d __x;
00084             do
00085               {
00086                 union
00087                 {
00088                   __m128i __i;
00089                   __m128d __d;
00090                 } __v;
00091 
00092                 if (__urngrange > __maskval)
00093                   {
00094                     if (__detail::_Power_of_2(__uerngrange))
00095                       __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
00096                                                              __urng()),
00097                                               __mask);
00098                     else
00099                       {
00100                         const __uctype __uerange = __maskval + 1;
00101                         const __uctype __scaling = __urngrange / __uerange;
00102                         const __uctype __past = __uerange * __scaling;
00103                         uint64_t __v1;
00104                         do
00105                           __v1 = __uctype(__urng()) - __urngmin;
00106                         while (__v1 >= __past);
00107                         __v1 /= __scaling;
00108                         uint64_t __v2;
00109                         do
00110                           __v2 = __uctype(__urng()) - __urngmin;
00111                         while (__v2 >= __past);
00112                         __v2 /= __scaling;
00113 
00114                         __v.__i = _mm_set_epi64x(__v1, __v2);
00115                       }
00116                   }
00117                 else if (__urngrange == __maskval)
00118                   __v.__i = _mm_set_epi64x(__urng(), __urng());
00119                 else if ((__urngrange + 2) * __urngrange >= __maskval
00120                          && __detail::_Power_of_2(__uerngrange))
00121                   {
00122                     uint64_t __v1 = __urng() * __uerngrange + __urng();
00123                     uint64_t __v2 = __urng() * __uerngrange + __urng();
00124 
00125                     __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
00126                                             __mask);
00127                   }
00128                 else
00129                   {
00130                     size_t __nrng = 2;
00131                     __uctype __high = __maskval / __uerngrange / __uerngrange;
00132                     while (__high > __uerngrange)
00133                       {
00134                         ++__nrng;
00135                         __high /= __uerngrange;
00136                       }
00137                     const __uctype __highrange = __high + 1;
00138                     const __uctype __scaling = __urngrange / __highrange;
00139                     const __uctype __past = __highrange * __scaling;
00140                     __uctype __tmp;
00141 
00142                     uint64_t __v1;
00143                     do
00144                       {
00145                         do
00146                           __tmp = __uctype(__urng()) - __urngmin;
00147                         while (__tmp >= __past);
00148                         __v1 = __tmp / __scaling;
00149                         for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
00150                           {
00151                             __tmp = __v1;
00152                             __v1 *= __uerngrange;
00153                             __v1 += __uctype(__urng()) - __urngmin;
00154                           }
00155                       }
00156                     while (__v1 > __maskval || __v1 < __tmp);
00157 
00158                     uint64_t __v2;
00159                     do
00160                       {
00161                         do
00162                           __tmp = __uctype(__urng()) - __urngmin;
00163                         while (__tmp >= __past);
00164                         __v2 = __tmp / __scaling;
00165                         for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
00166                           {
00167                             __tmp = __v2;
00168                             __v2 *= __uerngrange;
00169                             __v2 += __uctype(__urng()) - __urngmin;
00170                           }
00171                       }
00172                     while (__v2 > __maskval || __v2 < __tmp);
00173                     
00174                     __v.__i = _mm_set_epi64x(__v1, __v2);
00175                   }
00176 
00177                 __v.__i = _mm_or_si128(__v.__i, __two);
00178                 __x = _mm_sub_pd(__v.__d, __three);
00179                 __m128d __m = _mm_mul_pd(__x, __x);
00180                 __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
00181               }
00182             while (__le == 0.0 || __le >= 1.0);
00183 
00184             double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
00185                              * __param.stddev());
00186 
00187             __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
00188 
00189             _mm_storeu_pd(__f, __x);
00190             __f += 2;
00191           }
00192 
00193         if (__f != __t)
00194           {
00195             result_type __x, __y, __r2;
00196 
00197             __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
00198               __aurng(__urng);
00199 
00200             do
00201               {
00202                 __x = result_type(2.0) * __aurng() - 1.0;
00203                 __y = result_type(2.0) * __aurng() - 1.0;
00204                 __r2 = __x * __x + __y * __y;
00205               }
00206             while (__r2 > 1.0 || __r2 == 0.0);
00207 
00208             const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
00209             _M_saved = __x * __mult;
00210             _M_saved_available = true;
00211             *__f = __y * __mult * __param.stddev() + __param.mean();
00212           }
00213       }
00214 #endif
00215 
00216 
00217 _GLIBCXX_END_NAMESPACE_VERSION
00218 } // namespace
00219 
00220 
00221 #endif // _BITS_OPT_RANDOM_H