1 /* 2 * Copyright Nick Thompson, 2018 3 * Use, modification and distribution are subject to the 4 * Boost Software License, Version 1.0. (See accompanying file 5 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 #ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP 8 #define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP 9 #include <sstream> 10 #include <algorithm> 11 #include <vector> 12 #include <boost/atomic.hpp> 13 #include <functional> 14 #include <future> 15 #include <thread> 16 #include <initializer_list> 17 #include <utility> 18 #include <random> 19 #include <chrono> 20 #include <map> 21 #include <boost/math/policies/error_handling.hpp> 22 23 namespace boost { namespace math { namespace quadrature { 24 25 namespace detail { 26 enum class limit_classification {FINITE, 27 LOWER_BOUND_INFINITE, 28 UPPER_BOUND_INFINITE, 29 DOUBLE_INFINITE}; 30 } 31 32 template<class Real, class F, class RandomNumberGenerator = std::mt19937_64, class Policy = boost::math::policies::policy<>> 33 class naive_monte_carlo 34 { 35 public: naive_monte_carlo(const F & integrand,std::vector<std::pair<Real,Real>> const & bounds,Real error_goal,bool singular=true,uint64_t threads=std::thread::hardware_concurrency (),uint64_t seed=0)36 naive_monte_carlo(const F& integrand, 37 std::vector<std::pair<Real, Real>> const & bounds, 38 Real error_goal, 39 bool singular = true, 40 uint64_t threads = std::thread::hardware_concurrency(), 41 uint64_t seed = 0): m_num_threads{threads}, m_seed{seed} 42 { 43 using std::numeric_limits; 44 using std::sqrt; 45 uint64_t n = bounds.size(); 46 m_lbs.resize(n); 47 m_dxs.resize(n); 48 m_limit_types.resize(n); 49 m_volume = 1; 50 static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>"; 51 for (uint64_t i = 0; i < n; ++i) 52 { 53 if (bounds[i].second <= bounds[i].first) 54 { 55 boost::math::policies::raise_domain_error(function, "The upper bound is <= the lower bound.\n", bounds[i].second, Policy()); 56 return; 57 } 58 if (bounds[i].first == -numeric_limits<Real>::infinity()) 59 { 60 if (bounds[i].second == numeric_limits<Real>::infinity()) 61 { 62 m_limit_types[i] = detail::limit_classification::DOUBLE_INFINITE; 63 } 64 else 65 { 66 m_limit_types[i] = detail::limit_classification::LOWER_BOUND_INFINITE; 67 // Ok ok this is bad to use the second bound as the lower limit and then reflect. 68 m_lbs[i] = bounds[i].second; 69 m_dxs[i] = numeric_limits<Real>::quiet_NaN(); 70 } 71 } 72 else if (bounds[i].second == numeric_limits<Real>::infinity()) 73 { 74 m_limit_types[i] = detail::limit_classification::UPPER_BOUND_INFINITE; 75 if (singular) 76 { 77 // I've found that it's easier to sample on a closed set and perturb the boundary 78 // than to try to sample very close to the boundary. 79 m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)()); 80 } 81 else 82 { 83 m_lbs[i] = bounds[i].first; 84 } 85 m_dxs[i] = numeric_limits<Real>::quiet_NaN(); 86 } 87 else 88 { 89 m_limit_types[i] = detail::limit_classification::FINITE; 90 if (singular) 91 { 92 if (bounds[i].first == 0) 93 { 94 m_lbs[i] = std::numeric_limits<Real>::epsilon(); 95 } 96 else 97 { 98 m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)()); 99 } 100 101 m_dxs[i] = std::nextafter(bounds[i].second, std::numeric_limits<Real>::lowest()) - m_lbs[i]; 102 } 103 else 104 { 105 m_lbs[i] = bounds[i].first; 106 m_dxs[i] = bounds[i].second - bounds[i].first; 107 } 108 m_volume *= m_dxs[i]; 109 } 110 } 111 112 m_integrand = [this, &integrand](std::vector<Real> & x)->Real __anonb75f970f0102(std::vector<Real> & x)113 { 114 Real coeff = m_volume; 115 for (uint64_t i = 0; i < x.size(); ++i) 116 { 117 // Variable transformation are listed at: 118 // https://en.wikipedia.org/wiki/Numerical_integration 119 // However, we've made some changes to these so that we can evaluate on a compact domain. 120 if (m_limit_types[i] == detail::limit_classification::FINITE) 121 { 122 x[i] = m_lbs[i] + x[i]*m_dxs[i]; 123 } 124 else if (m_limit_types[i] == detail::limit_classification::UPPER_BOUND_INFINITE) 125 { 126 Real t = x[i]; 127 Real z = 1/(1 + numeric_limits<Real>::epsilon() - t); 128 coeff *= (z*z)*(1 + numeric_limits<Real>::epsilon()); 129 x[i] = m_lbs[i] + t*z; 130 } 131 else if (m_limit_types[i] == detail::limit_classification::LOWER_BOUND_INFINITE) 132 { 133 Real t = x[i]; 134 Real z = 1/(t+sqrt((numeric_limits<Real>::min)())); 135 coeff *= (z*z); 136 x[i] = m_lbs[i] + (t-1)*z; 137 } 138 else 139 { 140 Real t1 = 1/(1+numeric_limits<Real>::epsilon() - x[i]); 141 Real t2 = 1/(x[i]+numeric_limits<Real>::epsilon()); 142 x[i] = (2*x[i]-1)*t1*t2/4; 143 coeff *= (t1*t1+t2*t2)/4; 144 } 145 } 146 return coeff*integrand(x); 147 }; 148 149 // If we don't do a single function call in the constructor, 150 // we can't do a restart. 151 std::vector<Real> x(m_lbs.size()); 152 153 // If the seed is zero, that tells us to choose a random seed for the user: 154 if (seed == 0) 155 { 156 std::random_device rd; 157 seed = rd(); 158 } 159 160 RandomNumberGenerator gen(seed); 161 Real inv_denom = 1/static_cast<Real>(((gen.max)()-(gen.min)())); 162 163 m_num_threads = (std::max)(m_num_threads, (uint64_t) 1); 164 m_thread_calls.reset(new boost::atomic<uint64_t>[threads]); 165 m_thread_Ss.reset(new boost::atomic<Real>[threads]); 166 m_thread_averages.reset(new boost::atomic<Real>[threads]); 167 168 Real avg = 0; 169 for (uint64_t i = 0; i < m_num_threads; ++i) 170 { 171 for (uint64_t j = 0; j < m_lbs.size(); ++j) 172 { 173 x[j] = (gen()-(gen.min)())*inv_denom; 174 } 175 Real y = m_integrand(x); 176 m_thread_averages[i] = y; // relaxed store 177 m_thread_calls[i] = 1; 178 m_thread_Ss[i] = 0; 179 avg += y; 180 } 181 avg /= m_num_threads; 182 m_avg = avg; // relaxed store 183 184 m_error_goal = error_goal; // relaxed store 185 m_start = std::chrono::system_clock::now(); 186 m_done = false; // relaxed store 187 m_total_calls = m_num_threads; // relaxed store 188 m_variance = (numeric_limits<Real>::max)(); 189 } 190 integrate()191 std::future<Real> integrate() 192 { 193 // Set done to false in case we wish to restart: 194 m_done.store(false); // relaxed store, no worker threads yet 195 m_start = std::chrono::system_clock::now(); 196 return std::async(std::launch::async, 197 &naive_monte_carlo::m_integrate, this); 198 } 199 cancel()200 void cancel() 201 { 202 // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same. 203 // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing. 204 m_seed = m_seed*m_seed; 205 m_done = true; // relaxed store, worker threads will get the message eventually 206 // Make sure the error goal is infinite, because otherwise we'll loop when we do the final error goal check: 207 m_error_goal = (std::numeric_limits<Real>::max)(); 208 } 209 variance() const210 Real variance() const 211 { 212 return m_variance.load(); 213 } 214 current_error_estimate() const215 Real current_error_estimate() const 216 { 217 using std::sqrt; 218 // 219 // There is a bug here: m_variance and m_total_calls get updated asynchronously 220 // and may be out of synch when we compute the error estimate, not sure if it matters though... 221 // 222 return sqrt(m_variance.load()/m_total_calls.load()); 223 } 224 estimated_time_to_completion() const225 std::chrono::duration<Real> estimated_time_to_completion() const 226 { 227 auto now = std::chrono::system_clock::now(); 228 std::chrono::duration<Real> elapsed_seconds = now - m_start; 229 Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load 230 if (r*r <= 1) { 231 return 0*elapsed_seconds; 232 } 233 return (r*r - 1)*elapsed_seconds; 234 } 235 update_target_error(Real new_target_error)236 void update_target_error(Real new_target_error) 237 { 238 m_error_goal = new_target_error; // relaxed store 239 } 240 progress() const241 Real progress() const 242 { 243 Real r = m_error_goal.load()/this->current_error_estimate(); // relaxed load 244 if (r*r >= 1) 245 { 246 return 1; 247 } 248 return r*r; 249 } 250 current_estimate() const251 Real current_estimate() const 252 { 253 return m_avg.load(); 254 } 255 calls() const256 uint64_t calls() const 257 { 258 return m_total_calls.load(); // relaxed load 259 } 260 261 private: 262 m_integrate()263 Real m_integrate() 264 { 265 uint64_t seed; 266 // If the user tells us to pick a seed, pick a seed: 267 if (m_seed == 0) 268 { 269 std::random_device rd; 270 seed = rd(); 271 } 272 else // use the seed we are given: 273 { 274 seed = m_seed; 275 } 276 RandomNumberGenerator gen(seed); 277 int max_repeat_tries = 5; 278 do{ 279 280 if (max_repeat_tries < 5) 281 { 282 m_done = false; 283 284 #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES 285 std::cout << "Failed to achieve required tolerance first time through..\n"; 286 std::cout << " variance = " << m_variance << std::endl; 287 std::cout << " average = " << m_avg << std::endl; 288 std::cout << " total calls = " << m_total_calls << std::endl; 289 290 for (std::size_t i = 0; i < m_num_threads; ++i) 291 std::cout << " thread_calls[" << i << "] = " << m_thread_calls[i] << std::endl; 292 for (std::size_t i = 0; i < m_num_threads; ++i) 293 std::cout << " thread_averages[" << i << "] = " << m_thread_averages[i] << std::endl; 294 for (std::size_t i = 0; i < m_num_threads; ++i) 295 std::cout << " thread_Ss[" << i << "] = " << m_thread_Ss[i] << std::endl; 296 #endif 297 } 298 299 std::vector<std::thread> threads(m_num_threads); 300 for (uint64_t i = 0; i < threads.size(); ++i) 301 { 302 threads[i] = std::thread(&naive_monte_carlo::m_thread_monte, this, i, gen()); 303 } 304 do { 305 std::this_thread::sleep_for(std::chrono::milliseconds(100)); 306 uint64_t total_calls = 0; 307 for (uint64_t i = 0; i < m_num_threads; ++i) 308 { 309 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume); 310 total_calls += t_calls; 311 } 312 Real variance = 0; 313 Real avg = 0; 314 for (uint64_t i = 0; i < m_num_threads; ++i) 315 { 316 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume); 317 // Will this overflow? Not hard to remove . . . 318 avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls); 319 variance += m_thread_Ss[i].load(boost::memory_order::relaxed); 320 } 321 m_avg.store(avg, boost::memory_order::release); 322 m_variance.store(variance / (total_calls - 1), boost::memory_order::release); 323 m_total_calls = total_calls; // relaxed store, it's just for user feedback 324 // Allow cancellation: 325 if (m_done) // relaxed load 326 { 327 break; 328 } 329 } while (m_total_calls < 2048 || this->current_error_estimate() > m_error_goal.load(boost::memory_order::consume)); 330 // Error bound met; signal the threads: 331 m_done = true; // relaxed store, threads will get the message in the end 332 std::for_each(threads.begin(), threads.end(), 333 std::mem_fn(&std::thread::join)); 334 if (m_exception) 335 { 336 std::rethrow_exception(m_exception); 337 } 338 // Incorporate their work into the final estimate: 339 uint64_t total_calls = 0; 340 for (uint64_t i = 0; i < m_num_threads; ++i) 341 { 342 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume); 343 total_calls += t_calls; 344 } 345 Real variance = 0; 346 Real avg = 0; 347 348 for (uint64_t i = 0; i < m_num_threads; ++i) 349 { 350 uint64_t t_calls = m_thread_calls[i].load(boost::memory_order::consume); 351 // Averages weighted by the number of calls the thread made: 352 avg += m_thread_averages[i].load(boost::memory_order::relaxed)*((Real)t_calls / (Real)total_calls); 353 variance += m_thread_Ss[i].load(boost::memory_order::relaxed); 354 } 355 m_avg.store(avg, boost::memory_order::release); 356 m_variance.store(variance / (total_calls - 1), boost::memory_order::release); 357 m_total_calls = total_calls; // relaxed store, this is just user feedback 358 359 // Sometimes, the master will observe the variance at a very "good" (or bad?) moment, 360 // Then the threads proceed to find the variance is much greater by the time they hear the message to stop. 361 // This *WOULD* make sure that the final error estimate is within the error bounds. 362 } 363 while ((--max_repeat_tries >= 0) && (this->current_error_estimate() > m_error_goal)); 364 365 return m_avg.load(boost::memory_order::consume); 366 } 367 m_thread_monte(uint64_t thread_index,uint64_t seed)368 void m_thread_monte(uint64_t thread_index, uint64_t seed) 369 { 370 using std::numeric_limits; 371 try 372 { 373 std::vector<Real> x(m_lbs.size()); 374 RandomNumberGenerator gen(seed); 375 Real inv_denom = (Real) 1/(Real)( (gen.max)() - (gen.min)() ); 376 Real M1 = m_thread_averages[thread_index].load(boost::memory_order::consume); 377 Real S = m_thread_Ss[thread_index].load(boost::memory_order::consume); 378 // Kahan summation is required or the value of the integrand will go on a random walk during long computations. 379 // See the implementation discussion. 380 // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges! 381 // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware) 382 Real compensator = 0; 383 uint64_t k = m_thread_calls[thread_index].load(boost::memory_order::consume); 384 while (!m_done) // relaxed load 385 { 386 int j = 0; 387 // If we don't have a certain number of calls before an update, we can easily terminate prematurely 388 // because the variance estimate is way too low. This magic number is a reasonable compromise, as 1/sqrt(2048) = 0.02, 389 // so it should recover 2 digits if the integrand isn't poorly behaved, and if it is, it should discover that before premature termination. 390 // Of course if the user has 64 threads, then this number is probably excessive. 391 int magic_calls_before_update = 2048; 392 while (j++ < magic_calls_before_update) 393 { 394 for (uint64_t i = 0; i < m_lbs.size(); ++i) 395 { 396 x[i] = (gen() - (gen.min)())*inv_denom; 397 } 398 Real f = m_integrand(x); 399 using std::isfinite; 400 if (!isfinite(f)) 401 { 402 // The call to m_integrand transform x, so this error message states the correct node. 403 std::stringstream os; 404 os << "Your integrand was evaluated at {"; 405 for (uint64_t i = 0; i < x.size() -1; ++i) 406 { 407 os << x[i] << ", "; 408 } 409 os << x[x.size() -1] << "}, and returned " << f << std::endl; 410 static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>"; 411 boost::math::policies::raise_domain_error(function, os.str().c_str(), /*this is a dummy arg to make it compile*/ 7.2, Policy()); 412 } 413 ++k; 414 Real term = (f - M1)/k; 415 Real y1 = term - compensator; 416 Real M2 = M1 + y1; 417 compensator = (M2 - M1) - y1; 418 S += (f - M1)*(f - M2); 419 M1 = M2; 420 } 421 m_thread_averages[thread_index].store(M1, boost::memory_order::release); 422 m_thread_Ss[thread_index].store(S, boost::memory_order::release); 423 m_thread_calls[thread_index].store(k, boost::memory_order::release); 424 } 425 } 426 catch (...) 427 { 428 // Signal the other threads that the computation is ruined: 429 m_done = true; // relaxed store 430 m_exception = std::current_exception(); 431 } 432 } 433 434 std::function<Real(std::vector<Real> &)> m_integrand; 435 uint64_t m_num_threads; 436 uint64_t m_seed; 437 boost::atomic<Real> m_error_goal; 438 boost::atomic<bool> m_done; 439 std::vector<Real> m_lbs; 440 std::vector<Real> m_dxs; 441 std::vector<detail::limit_classification> m_limit_types; 442 Real m_volume; 443 boost::atomic<uint64_t> m_total_calls; 444 // I wanted these to be vectors rather than maps, 445 // but you can't resize a vector of atomics. 446 std::unique_ptr<boost::atomic<uint64_t>[]> m_thread_calls; 447 boost::atomic<Real> m_variance; 448 std::unique_ptr<boost::atomic<Real>[]> m_thread_Ss; 449 boost::atomic<Real> m_avg; 450 std::unique_ptr<boost::atomic<Real>[]> m_thread_averages; 451 std::chrono::time_point<std::chrono::system_clock> m_start; 452 std::exception_ptr m_exception; 453 }; 454 455 }}} 456 #endif 457