CORSIKA  @c8_version@
The framework to simulate particle cascades for astroparticle physics
boxmuller.hpp
1 /*
2 Copyright 2010-2011, D. E. Shaw Research.
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9 * Redistributions of source code must retain the above copyright
10  notice, this list of conditions, and the following disclaimer.
11 
12 * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions, and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16 * Neither the name of D. E. Shaw Research nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 // This file implements the Box-Muller method for generating gaussian
34 // random variables (GRVs). Box-Muller has the advantage of
35 // deterministically requiring exactly two uniform random variables as
36 // input and producing exactly two GRVs as output, which makes it
37 // especially well-suited to the counter-based generators in
38 // Random123. Other methods (e.g., Ziggurat, polar) require an
39 // indeterminate number of inputs for each output and so require a
40 // 'MicroURNG' to be used with Random123. The down side of Box-Muller
41 // is that it calls sincos, log and sqrt, which may be slow. However,
42 // on GPUs, these functions are remarkably fast, which makes
43 // Box-Muller the fastest GRV generator we know of on GPUs.
44 //
45 // This file exports two structs and one overloaded function,
46 // all in the r123 namespace:
47 // struct r123::float2{ float x,y; }
48 // struct r123::double2{ double x,y; }
49 //
50 // r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
51 // r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
52 //
53 // float2 and double2 are identical to their synonymous global-
54 // namespace structures in CUDA.
55 //
56 // This file may not be as portable, and has not been tested as
57 // rigorously as other files in the library, e.g., the generators.
58 // Nevertheless, we hope it is useful and we encourage developers to
59 // copy it and modify it for their own use. We invite comments and
60 // improvements.
61 
62 #ifndef _r123_BOXMULLER_HPP__
63 #define _r123_BOXMULLER_HPP__
64 
65 #include <Random123/features/compilerfeatures.h>
66 #include <Random123/uniform.hpp>
67 #include <math.h>
68 
69 namespace random_iterator_r123 {
70 
71 #if !defined(__CUDACC__)
72  typedef struct {
73  float x, y;
74  } float2;
75  typedef struct {
76  double x, y;
77  } double2;
78 #else
79  typedef ::float2 float2;
80  typedef ::double2 double2;
81 #endif
82 
83 #if !defined(RANDOM_ITERATOR_R123_NO_SINCOS) && defined(__APPLE__)
84 /* MacOS X 10.10.5 (2015) doesn't have sincosf */
85 #define RANDOM_ITERATOR_R123_NO_SINCOS 1
86 #endif
87 
88 #if RANDOM_ITERATOR_R123_NO_SINCOS /* enable this if sincos and sincosf are not in the \
89  math library */
90  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincosf(
91  float x, float* s, float* c) {
92  *s = sinf(x);
93  *c = cosf(x);
94  }
95 
96  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincos(
97  double x, double* s, double* c) {
98  *s = sin(x);
99  *c = cos(x);
100  }
101 #endif /* sincos is not in the math library */
102 
103 #if !defined(CUDART_VERSION) || \
104  CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
105 
106  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospif(
107  float x, float* s, float* c) {
108  const float PIf = 3.1415926535897932f;
109  sincosf(PIf * x, s, c);
110  }
111 
112  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospi(
113  double x, double* s, double* c) {
114  const double PI = 3.1415926535897932;
115  sincos(PI * x, s, c);
116  }
117 #endif /* sincospi is not in math lib */
118 
119  /*
120  * take two 32bit unsigned random values and return a float2 with
121  * two random floats in a normal distribution via a Box-Muller transform
122  */
123  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE float2
124  boxmuller(uint32_t u0, uint32_t u1) {
125  float r;
126  float2 f;
127  sincospif(uneg11<float>(u0), &f.x, &f.y);
128  r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
129  f.x *= r;
130  f.y *= r;
131  return f;
132  }
133 
134  /*
135  * take two 64bit unsigned random values and return a double2 with
136  * two random doubles in a normal distribution via a Box-Muller transform
137  */
138  RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE double2
139  boxmuller(uint64_t u0, uint64_t u1) {
140  double r;
141  double2 f;
142 
143  sincospi(uneg11<double>(u0), &f.x, &f.y);
144  r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
145  f.x *= r;
146  f.y *= r;
147  return f;
148  }
149 } // namespace random_iterator_r123
150 
151 #endif /* BOXMULLER_H__ */
detail::Root< D, 2, X > constexpr sqrt(quantity< D, X > const &x)
square root.
Definition: quantity.hpp:678