diff --git a/.gitignore b/.gitignore index 9e27eddf76..e1d8210b59 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ inspect.html test/cuda *.DS_Store /**/*.dSYM/ +build/* +*.json + diff --git a/doc/math.qbk b/doc/math.qbk index 847b33e6eb..4b843c39db 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -1,13 +1,13 @@ [book Math Toolkit [quickbook 1.7] - [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] + [copyright 2006-2020 Nikhar Agrawal, Anton Bikineev, Matthew Borland, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos, Hubert Holin, Bruno Lalande, John Maddock, Evan Miller, Jeremy Murphy, Matthew Pulver, Johan Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg, Daryle Walker and Xiaogang Zhang] [/purpose ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22] [license Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at [@http://www.boost.org/LICENSE_1_0.txt]) ] - [authors [Agrawal, Nikhar], [Bikineev, Anton], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [Råde, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] + [authors [Agrawal, Nikhar], [Bikineev, Anton], [Borland, Matthew], [Bristow, Paul A.], [Holin, Hubert], [Guazzone, Marco], [Kormanyos, Christopher], [Lalande, Bruno], [Maddock, John], [Miller, Evan], [Murphy, Jeremy W.], [Pulver, Matthew], [Råde, Johan], [Sobotta, Benjamin], [Sewani, Gautam], [Thompson, Nicholas], [van den Berg, Thijs], [Walker, Daryle], [Zhang, Xiaogang]] [/last-revision $Date$] [version 2.12.0] ] diff --git a/doc/sf/number_series.qbk b/doc/sf/number_series.qbk index 970b3c7937..98ab09873c 100644 --- a/doc/sf/number_series.qbk +++ b/doc/sf/number_series.qbk @@ -259,9 +259,86 @@ Passing a value greater than `max_prime` results in a __domain_error being raise This function is `constexpr` only if the compiler supports C++14 constexpr functions. -[endsect] [/section:primes] +[endsect] [/section:primes Prime Numbers] -[endsect] [/Number Series] +[section:prime_sieve Prime Sieve] + +[h4 Synopsis] + +`` +#include +`` + +namespace boost { namespace math { + + template + void prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &resultant_primes) + + template + void prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &resultant_primes) + + template + void prime_sieve(Integer upper_bound, Container &resultant_primes) + + template + void prime_range(Integer lower_bound, Integer upper_bound, Container &resultant_primes) + + template + constexpr void prime_reserve(Integer upper_bound, std::vector &prime_container) + + +}} // End namespaces + +[h4 Description] + +There are two sets of prime sieveing functions available: `prime_sieve` and `prime_range`. +`prime_sieve` will return all primes in the range [2, `upper_bound`). +`prime_range` will return all primes in the range [lower_bound, upper_bound). +Both `prime_sieve` and `prime_range` can perform arbitrary precision calculations using `boost::multiprecision::cpp_int` or `boost::multiprecision::mpz_int`. + +`prime_sieve` and `prime_range` both have a parameter for an execution policy. +The policies `std::execution::par` or `std::execution::par_unseq` will enable internal multi-threading. +All other policies or no policy will result in the `prime_sieve` and `prime_range` executing sequentially. + +`prime_reserve` uses the prime number theorem to reserve for approximately the correct number of primes given `upper_bound`. + +[h4 Examples] + // To reserve space and calculate primes [2, 1,000,000) in parallel + std::vector primes; + boost::math::prime_reserve(1'000'000, primes); + boost::math::prime_sieve(std::execution::par, 1'000'000, primes); + + // To calculate primes [100, 1,000) sequentially + std::vector primes; + boost::math::prime_range(100, 1'000, primes); + +[h4 Complexity] +These functions were tested for complexity using [@https://github.com/google/benchmark google benchmark] on a system with the following specifications: +* CPU: Intel i5-10500 +* OS: Red Hat Enterprise Linux 8.2 +* Compiler: gcc-g++ 10.2.0 (Compiled from source with GMP 6.2.0, MPC 1.2.0, and MPFR 4.1.0 using RHEL gcc-toolset-9) +* Compiler flags: -Ofast -MMD -march=native -g -lbenchmark -lpthread -lgmp + +Range of test upper_bound: 2[super 1] - 2[super 30] + +[pre''' +[table:id Complexity Calculations `bigo[](N)` + [[Type] [Time Complexity] [Time Complexity RMS] [CPU Complexity] [CPU Complexity RMS]] + [[int32_t] [0.72N] [5%] [0.02N] [13%]] + [[int64_t] [0.78N] [16%] [0.03N] [6%]] + [[uint32_t] [0.71N] [10%] [0.02N] [14%]] + [[cpp_int] [6.24N] [44%] [0.35N] [4%]] + [[mpz_int] [9.27N] [11%] [0.69N] [8%]] +] +'''] + +[h4 References] +* Sorensen, Jonathan [@https://research.cs.wisc.edu/techreports/1990/TR909.pdf An Introduction to Prime Number Sieves] +* Gries, David and Misra, Jayadev [@https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf A Linear Sieve Algorithm for Finding Prime Numbers] + +[endsect] [/section:primes Prime Sieve] + +[endsect] [/section:number_series Number Series] [/ Copyright 2013, 2014 Nikhar Agrawal, Christopher Kormanyos, John Maddock, Paul A. Bristow. diff --git a/include/boost/math/special_functions/detail/interval_prime_sieve.hpp b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp new file mode 100644 index 0000000000..ee815fe7d6 --- /dev/null +++ b/include/boost/math/special_functions/detail/interval_prime_sieve.hpp @@ -0,0 +1,337 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class IntervalSieve +{ + +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. +using int_128t = unsigned __int128; // One machine word smaller than the boost equivalent +#else +using int_128t = boost::multiprecision::uint128_t; +#endif + +private: + // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) + // This table is from page 421, table 16.3.1, Hugh Williams' book + // Last 8 entries added from Wooding's MS thesis, 2003, pp. 92-93 + struct pssentry + { + static constexpr std::size_t len {49}; + static constexpr std::array prime + { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 67, 71, 79, 83, 101, 103, 107, 113, 131, 149, 157, + 173, 181, 193, 197, 211, 227, 229, 233, 239, 241, 251, 257, 263, 277, 281, 283, 293, 311, 331, 337, 347, 353 + }; + + static constexpr std::array ps + { + 73, 241, 1'009, 2'641, 8'089, 18'001, 53'881, 87'481, 117'049, 515'761, 1'083'289, 3'206'641, 3'818'929, + 9'257'329, 22'000'801, 48'473'881, 175'244'281, 427'733'329, 898'716'289, 2'805'544'681, 10'310'263'441, + 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, + 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, + 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689u, + + (static_cast(0x3uLL) << 64) | 0xc956f827e0524359uLL, // 69'848'288'320'900'186'969 + (static_cast(0xbuLL) << 64) | 0x539315b3b1268d59uLL, // 208'936'365'799'044'975'961 + (static_cast(0x1cuLL) << 64) | 0xec87d86ca60b50a1uLL, // 533'552'663'339'828'203'681 + (static_cast(0x32uLL) << 64) | 0xc6d3496f20db3d81uLL, // 936'664'079'266'714'697'089 + (static_cast(0x74uLL) << 64) | 0x210967a12ba94be1uLL, // 2'142'202'860'370'269'916'129 + (static_cast(0x2e3uLL) << 64) | 0xec11ddc09fd65c51uLL, // 13'649'154'491'558'298'803'281 + (static_cast(0x753uLL) << 64) | 0x641c14b397c27bf1uLL, // 34'594'858'801'670'127'778'801 + (static_cast(0x1511uLL) << 64) | 0x85fdf38d1fc9ce21uLL, // 99'492'945'930'479'213'334'049 + (static_cast(0x3e8buLL) << 64) | 0xaba417e222ca5091uLL // 295'363'187'400'900'310'880'401 + }; + }; + + static constexpr pssentry pss_{}; + static constexpr boost::math::detail::prime_sieve::MOD210Wheel w_{}; + std::size_t tdlimit_; + + Integer delta_; + Integer left_; + Integer right_; + + OutputIterator resultant_primes_; + + // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations + boost::dynamic_bitset<> b_; + + std::vector primes_; + std::int_fast64_t plimit_; + + void Settdlimit() noexcept; + void SeiveLength(const Integer d) noexcept; + void Sieve() noexcept; + bool Psstest(const std::size_t pos) noexcept; + void Psstestall() noexcept; + decltype(auto) WriteOutput() noexcept; + +public: + IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right) noexcept; + decltype(auto) NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept; +}; + +template +void IntervalSieve::Settdlimit() noexcept +{ + const double dr {static_cast(right_)}; + const double delta {static_cast(delta_)}; + const double tdest {delta * std::log(dr)}; + + // Small cases + if(tdest * tdest >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + return; + } + + // First guess + if(tdest <= 1ul<<30) + { + tdlimit_ = static_cast(tdest); + } + + else + { + tdlimit_ = 1ul<<30; + } + + // Find the corresponding prime + std::size_t i; + for(i = pss_.len - 1; i > 0; --i) + { + if(static_cast(pss_.ps[i]) * tdlimit_ < dr) + { + break; + } + } + plimit_ = pss_.prime[i]; + + double tdlimit_guess = 1 + std::fmod(dr, static_cast(pss_.ps[i])); + if(tdlimit_guess * tdlimit_guess >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + } +} + +template +void IntervalSieve::SeiveLength(const Integer d) noexcept +{ + Integer r {left_ % d}; + Integer start {0}; + + if(r != 0) + { + start = d - r; + } + + for(Integer i {start}; i >= 0 && i < b_.size(); i += d) + { + b_[static_cast(i)] = 0; + } +} + +template +void IntervalSieve::Sieve() noexcept +{ + std::int_fast64_t primes_range {}; + if(plimit_ <= 10) + { + primes_range = 10; + } + + else + { + primes_range = plimit_; + } + + // Sieve with pre-computed (or small) primes and then use the wheel for the remainder + std::size_t i {}; + Integer j; + if(plimit_ <= pss_.prime.back()) + { + SeiveLength(static_cast(2)); + for(; pss_.prime[i] < primes_range; ++i) + { + SeiveLength(pss_.prime[i]); + } + + j = w_.Next(pss_.prime[--i]); + } + + else + { + primes_.resize(static_cast(prime_approximation(right_))); + linear_sieve(primes_range, primes_.begin()); + + for(; primes_[i] < primes_range; ++i) + { + SeiveLength(primes_[i]); + } + + j = w_.Next(primes_[--i]); + } + + for(; j <= tdlimit_; j = w_.Next(j)) + { + SeiveLength(j); + } +} + +template +decltype(auto) IntervalSieve::WriteOutput() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + *resultant_primes_++ = left_ + i; + } + } + return resultant_primes_; +} + +// Performs the pseduosqaure prime test on n = left + pos +// return 1 if prime or prime power, 0 otherwise +// Begins with a base-2 test +template +bool IntervalSieve::Psstest(const std::size_t pos) noexcept +{ + const Integer n {static_cast(left_ + pos)}; + const Integer exponent {(n - 1) / 2}; + const std::int_fast64_t nmod8 = static_cast(n % 8); + + std::int_fast64_t negative_one_count {}; + + for(std::size_t i {}; i < primes_.size(); ++i) + { + Integer temp = primes_[i]; + temp = static_cast(std::pow(static_cast(temp), static_cast(n))); + + if(temp == 1) + { + if(i == 0 && nmod8 == 5) + { + return false; + } + } + + else + { + ++temp; + if(temp == n) + { + if(i > 0) + { + ++negative_one_count; + } + } + else + { + return false; + } + } + } + + return (nmod8 != 1 || negative_one_count > 0); +} + +template +void IntervalSieve::Psstestall() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + if(!Psstest(i)) + { + b_[i] = 0; + } + } + } +} + +template +IntervalSieve::IntervalSieve(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept : + left_ {left}, right_ {right}, resultant_primes_ {resultant_primes} +{ + delta_ = right_ - left_; + b_.resize(static_cast(delta_), 1); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + WriteOutput(); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + return WriteOutput(); +} + +template +decltype(auto) IntervalSieve::NewRange(const Integer left, const Integer right, OutputIterator resultant_primes) noexcept +{ + resultant_primes_ = resultant_primes; + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + return WriteOutput(); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_INTERVAL_SIEVE_HPP diff --git a/include/boost/math/special_functions/detail/linear_prime_sieve.hpp b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp new file mode 100644 index 0000000000..96176309c0 --- /dev/null +++ b/include/boost/math/special_functions/detail/linear_prime_sieve.hpp @@ -0,0 +1,162 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +decltype(auto) linear_sieve(const Integer upper_bound, OutputIterator resultant_primes) +{ + const std::size_t masks_size {static_cast(upper_bound / 2 + 1)}; + std::unique_ptr masks {new bool[masks_size]}; + memset(masks.get(), true, sizeof(*masks.get()) * (masks_size)); + + *resultant_primes++ = 2; + + for(std::size_t index {1}; index < masks_size; ++index) + { + if(masks[index]) + { + *resultant_primes++ = static_cast(2 * index + 1); + for(std::size_t clear {index * 3 + 1}; clear < masks_size; clear += index * 2 + 1) + { + masks[clear] = false; + } + } + } + return resultant_primes; +} + +// 4'096 is where benchmarked performance of linear_sieve begins to diverge +template +static const Integer linear_sieve_limit = Integer(4'096); // Constexpr does not work with boost::multiprecision types + +// Stepanov Sieve - From Mathematics to Generic Programming Chap 3 +template +void mark_sieve(RandomAccessIterator first, RandomAccessIterator last, Integer factor) +{ + *first = false; + while(last - first > factor) + { + first = first + factor; + *first = false; + } +} + +template +inline void mark_sieve(Bitset& bits, const Integer factor) +{ + for(Integer i {factor * factor}; i < bits.size(); i += factor) + { + bits[static_cast(i)] = 0; + } +} + +template +void sift(RandomAccessIterator first, Integer n) +{ + const auto last {std::next(first, static_cast(n))}; + std::fill(first, last, true); + Integer i {0}; + Integer index_square {3}; + Integer factor {3}; + + for(; index_square < n; index_square += factor + factor - 2) + { + if(first[i]) + { + mark_sieve(first + index_square, last, factor); + } + + ++i; + factor += 2; + } +} + +// TODO(mborland): Pass in a more efficient data structure (likely dynamic_bitset) to sift and post-process +template +inline decltype(auto) stepanov_sieve(Integer upper_bound, OutputIterator resultant_primes) +{ + if(upper_bound == 2) + { + return resultant_primes; + } + + sift(resultant_primes, upper_bound); + return resultant_primes; +} + +// TODO(mborland): Pass in execution policy. mark_sieve can readily be converted to std::for_each, but dynamic_bitset would need replaced with something +// that has iterators +template +decltype(auto) wheel_sieve_of_eratosthenes(const Integer upper_bound, OutputIterator resultant_primes) +{ + if(upper_bound == 2) + { + *resultant_primes++ = static_cast(2); + return resultant_primes; + } + + const Integer sqrt_upper_bound {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + boost::dynamic_bitset<> trial(static_cast(upper_bound)); + trial.set(); + std::array primes {2, 3, 5}; // Wheel basis + std::array wheel {7, 11, 13, 17, 19, 23, 29, 31}; // MOD 30 wheel + const Integer wheel_mod {30}; + + for(std::size_t i {}; i < primes.size(); ++i) + { + mark_sieve(trial, primes[i]); + *resultant_primes++ = primes[i]; + } + + // Last value in the wheel is the starting point for the next step + for(std::size_t i {}; i < wheel.size(); ++i) + { + mark_sieve(trial, wheel[i]); + *resultant_primes++ = wheel[i]; + } + + Integer i {wheel_mod}; + for(; (i + wheel.front()) < sqrt_upper_bound; i += wheel_mod) + { + for(std::size_t j {}; j < wheel.size(); ++j) + { + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + mark_sieve(trial, spoke); + *resultant_primes++ = std::move(spoke); + } + } + } + + for(; (i + wheel.front()) < upper_bound; i += wheel_mod) + { + for(std::size_t j {}; j < wheel.size(); ++j) + { + Integer spoke {i + wheel[j]}; + if(trial[static_cast(spoke)]) + { + *resultant_primes++ = std::move(spoke); + } + } + } + + return resultant_primes; +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LINEAR_PRIME_SIEVE_HPP diff --git a/include/boost/math/special_functions/detail/prime_wheel.hpp b/include/boost/math/special_functions/detail/prime_wheel.hpp new file mode 100644 index 0000000000..3b8ec9a70d --- /dev/null +++ b/include/boost/math/special_functions/detail/prime_wheel.hpp @@ -0,0 +1,293 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +template +class Wheel +{ +private: + struct Wheelrec + { + std::int_fast32_t rp; + std::int_fast32_t dist; + std::int_fast32_t pos; + std::int_fast32_t inv; + }; + + std::unique_ptr W_; + Integer M_; + Integer k_; + Integer phi_; + + static constexpr std::array P_ {2, 3, 5, 7, 11, 13, 17, 19}; + + void build(Integer korsize); + +public: + Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; + explicit Wheel(Integer korsize) { build(korsize); } + explicit Wheel(const Wheel &x) { build(x.K()); } + + constexpr bool operator!() const noexcept { return W_ == nullptr; } + constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } + const Wheel& operator=(const Wheel &x) + { + if(this != &x) + { + build(x.K()); + } + return *this; + } + + constexpr Integer Size() const noexcept { return M_; } + constexpr Integer K() const noexcept { return k_; } + constexpr Integer Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } + constexpr Integer MakeRP(const Integer i) const noexcept + { + if(W_[i % M_].rp) + { + return i; + } + return Next(i); + } + constexpr Integer Prev(const Integer i) const noexcept { return i - W_[(M_ - (i % M_)) % M_].dist; } + constexpr Integer Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr Integer Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + + void Print(); +}; + +template +void Wheel::build(Integer korsize) +{ + // Calculate k_ and M_ + if(korsize >= 10) + { + --korsize; + for(k_ = 0; korsize > 0; ++k_) + { + korsize /= P_[k_]; + } + } + else + { + k_ = korsize; + } + + Integer i {0}; + Integer dist {0}; + Integer pos {1}; + + for(M_ = 1; i < k_; ++i) + { + M_ *= P_[i]; + } + + W_ = std::make_unique(M_); + + // Compute the RP field + for(i = 0; i < M_; ++i) + { + W_[i].rp = 1; + } + + for(i = 0; i < k_; ++i) + { + for(Integer j {0}; j < M_; j += P_[i]) + { + W_[j].rp = 0; + } + } + + // Compute the dist field + W_[M_- 1].dist = 2; + for(i = M_ - 2; i >= 0; --i) + { + W_[i].dist = ++dist; + if(W_[i].rp) + { + dist = 0; + } + } + + // Copute pos and inv fields + for(i = 0; i < M_; ++i) + { + W_[i].inv = 0; + if(W_[i].rp) + { + W_[pos].inv = i; + W_[i].pos = pos++; + } + else + { + W_[i].pos = 0; + } + + } + + W_[0].inv = -1; + phi_ = W_[M_- 1].pos; +} + +template +void Wheel::Print() +{ + std::int_fast32_t i {}; + std::cout << "Wheel size = " << this->Size() + << "\nk = " << this->K() + << "\nphi(M) = " << this->Phi() << std::endl; + + // Verify size + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << i << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nRP Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].rp << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nDist Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].dist << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nPos Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].pos << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nInv Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << W_[i].inv << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + std::cout << std::endl; +} + +// Pre-computed MOD 210 wheel +template +class MOD210Wheel final +{ +private: + static constexpr auto M_ {210}; + static constexpr auto k_ {4}; + static constexpr auto phi_ {28}; + + static constexpr std::array rp_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 + }; + + static constexpr std::array inv_ + { + -1, 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, + 107, 109, 113, 121, 127, 131, 137, 139, 143, 149, 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + static constexpr std::array pos_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 5, 0, 0, 0, 6, 0, + 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, 0, 12, 0, 0, + 0, 0, 0, 13, 0, 0, 0, 0, 0, 14, 0, 15, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 20, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 23, 0, 24, 0, 0, 0, 25, 0, 26, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 30, 0, 0, 0, 0, 0, 31, 0, 32, 0, 0, 0, 33, 0, 0, 0, 0, 0, 34, + 0, 35, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 0, 0, 38, 0, 39, 0, 0, 0, 40, 0, + 0, 0, 0, 0, 41, 0, 42, 0, 0, 0, 0, 0, 43, 0, 0, 0, 44, 0, 45, 0, 0, 0, 46, 0, 47, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 + }; + + static constexpr std::array dist_ + { + 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 6, 5, + 4, 3, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, + 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, + 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, + 1, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 2 + }; + +public: + constexpr MOD210Wheel() = default; + ~MOD210Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr auto Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } + constexpr auto MakeRP(const Integer i) const noexcept + { + if(rp_[i % M_]) + { + return i; + } + return Next(i); + } + constexpr auto Prev(const Integer i) const noexcept { return i - dist_[static_cast((M_ - (i % M_)) % M_)]; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[static_cast(i % M_)]; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[static_cast(i % phi_)]; } +}; +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_PRIME_WHEEL_HPP diff --git a/include/boost/math/special_functions/interval_sieve.hpp b/include/boost/math/special_functions/interval_sieve.hpp new file mode 100644 index 0000000000..172f2b4f2f --- /dev/null +++ b/include/boost/math/special_functions/interval_sieve.hpp @@ -0,0 +1,298 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail +{ +// boost::multiprecision and POD +template +inline double get_double(const Integer &x) noexcept +{ + return static_cast(x); +} + +template +class IntervalSieve +{ + +#ifdef BOOST_HAS_INT128 // Defined in GCC 4.6+, clang, intel. MSVC does not define. +using int_128t = unsigned __int128; // One machine word smaller than the boost equivalent +#else +using int_128t = boost::multiprecision::uint128_t; +#endif + +private: + // Table of pseudo-sqares (https://mathworld.wolfram.com/Pseudosquare.html) + // This table is from page 421, table 16.3.1, Hugh Williams' book + // Last 8 entries added from Wooding's MS thesis, 2003, pp. 92-93 + struct pssentry + { + static constexpr std::size_t len {49}; + static constexpr std::array prime + { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 67, 71, 79, 83, 101, 103, 107, 113, 131, 149, 157, + 173, 181, 193, 197, 211, 227, 229, 233, 239, 241, 251, 257, 263, 277, 281, 283, 293, 311, 331, 337, 347, 353 + }; + + static constexpr std::array ps + { + 73, 241, 1'009, 2'641, 8'089, 18'001, 53'881, 87'481, 117'049, 515'761, 1'083'289, 3'206'641, 3'818'929, + 9'257'329, 22'000'801, 48'473'881, 175'244'281, 427'733'329, 898'716'289, 2'805'544'681, 10'310'263'441, + 23'616'331'489, 85'157'610'409, 196'265'095'009, 2'871'842'842'801, 26'250'887'023'729, 112'434'732'901'969, + 178'936'222'537'081, 696'161'110'209'049, 2'854'909'648'103'881, 6'450'045'516'630'769, 11'641'399'247'947'921, + 190'621'428'905'186'449, 196'640'148'121'928'601, 712'624'335'095'093'521, 1'773'855'791'877'850'321, + 2'327'687'064'124'474'441, 6'384'991'873'059'836'689, 8'019'204'661'305'419'761, 10'198'100'582'046'287'689u, + + (static_cast(0x3uLL) << 64) | 0xc956f827e0524359uLL, // 69'848'288'320'900'186'969 + (static_cast(0xbuLL) << 64) | 0x539315b3b1268d59uLL, // 208'936'365'799'044'975'961 + (static_cast(0x1cuLL) << 64) | 0xec87d86ca60b50a1uLL, // 533'552'663'339'828'203'681 + (static_cast(0x32uLL) << 64) | 0xc6d3496f20db3d81uLL, // 936'664'079'266'714'697'089 + (static_cast(0x74uLL) << 64) | 0x210967a12ba94be1uLL, // 2'142'202'860'370'269'916'129 + (static_cast(0x2e3uLL) << 64) | 0xec11ddc09fd65c51uLL, // 13'649'154'491'558'298'803'281 + (static_cast(0x753uLL) << 64) | 0x641c14b397c27bf1uLL, // 34'594'858'801'670'127'778'801 + (static_cast(0x1511uLL) << 64) | 0x85fdf38d1fc9ce21uLL, // 99'492'945'930'479'213'334'049 + (static_cast(0x3e8buLL) << 64) | 0xaba417e222ca5091uLL // 295'363'187'400'900'310'880'401 + }; + }; + + static constexpr pssentry pss_{}; + static constexpr boost::math::detail::MOD210Wheel w_{}; + std::size_t tdlimit_; + + Integer delta_; + Integer left_; + Integer right_; + + // https://www.researchgate.net/publication/220803585_Performance_of_C_bit-vector_implementations + boost::dynamic_bitset<> b_; + + const PrimeContainer& primes_; + std::int_fast64_t plimit_; + + void Settdlimit() noexcept; + void SeiveLength(const Integer d) noexcept; + void Sieve() noexcept; + bool Psstest(const std::size_t pos) noexcept; + void Psstestall() noexcept; + void WriteOutput(Container &resultant_primes) noexcept; + +public: + IntervalSieve(const Integer left, const Integer right, const PrimeContainer &primes, Container &resultant_primes) noexcept; + void NewRange(const Integer left, const Integer right, Container &resultant_primes) noexcept; +}; + +template +void IntervalSieve::Settdlimit() noexcept +{ + const double dr = get_double(right_); + const double delta = get_double(delta_); + const double tdest = delta * std::log(dr); + + // Small cases + if(tdest * tdest >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + return; + } + + // First guess + if(tdest <= 1ul<<30) + { + tdlimit_ = static_cast(tdest); + } + + else + { + tdlimit_ = 1ul<<30; + } + + // Find the corresponding prime + std::size_t i; + for(i = pss_.len - 1; i > 0; --i) + { + if(static_cast(pss_.ps[i]) * tdlimit_ < dr) + { + break; + } + } + plimit_ = pss_.prime[i]; + + double tdlimit_guess = 1 + std::fmod(dr, static_cast(pss_.ps[i])); + if(tdlimit_guess * tdlimit_guess >= dr) + { + tdlimit_ = static_cast(std::sqrt(dr)); + plimit_ = 0; + } +} + +template +void IntervalSieve::SeiveLength(const Integer d) noexcept +{ + Integer r {left_ % d}; + Integer start {0}; + + if(r != 0) + { + start = d - r; + } + + for(Integer i {start}; i >= 0 && i < b_.size(); i += d) + { + b_[static_cast(i)] = 0; + } +} + +template +void IntervalSieve::Sieve() noexcept +{ + std::int_fast64_t primes_range {}; + if(plimit_ <= 10) + { + primes_range = 10; + } + + else + { + primes_range = plimit_; + } + + // Sieve with pre-computed primes and then use the wheel for the remainder + std::size_t i {}; + for(; primes_[i] < primes_range; ++i) + { + SeiveLength(primes_[i]); + } + + for(Integer j = w_.Next(primes_[--i]); j <= tdlimit_; j = w_.Next(j)) + { + SeiveLength(j); + } +} + +template +void IntervalSieve::WriteOutput(Container &resultant_primes) noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + resultant_primes.emplace_back(left_ + i); + } + } +} + +// Performs the pseduosqaure prime test on n = left + pos +// return 1 if prime or prime power, 0 otherwise +// Begins with a base-2 test +template +bool IntervalSieve::Psstest(const std::size_t pos) noexcept +{ + const Integer n {static_cast(left_ + pos)}; + const Integer exponent {(n - 1) / 2}; + const std::int_fast64_t nmod8 = static_cast(n % 8); + + std::int_fast64_t negative_one_count {0}; + + for(std::size_t i {}; i < primes_.size(); ++i) + { + Integer temp = primes_[i]; + temp = static_cast(std::pow(get_double(temp), get_double(n))); + + if(temp == 1) + { + if(i == 0 && nmod8 == 5) + { + return false; + } + } + + else + { + ++temp; + if(temp == n) + { + if(i > 0) + { + ++negative_one_count; + } + } + else + { + return false; + } + } + } + + return (nmod8 != 1 || negative_one_count > 0); +} + +template +void IntervalSieve::Psstestall() noexcept +{ + for(std::size_t i {}; i < b_.size(); ++i) + { + if(b_[i]) + { + if(!Psstest(i)) + { + b_[i] = 0; + } + } + } +} + +template +IntervalSieve::IntervalSieve(const Integer left, const Integer right, const PrimeContainer &primes, Container &resultant_primes) noexcept : + left_ {left}, right_ {right}, primes_ {primes} +{ + delta_ = right_ - left_; + b_.resize(static_cast(delta_), 1); + Settdlimit(); + Sieve(); + + if(plimit_ != 0 ) + { + Psstestall(); + } + + WriteOutput(resultant_primes); +} + +template +void IntervalSieve::NewRange(const Integer left, const Integer right, Container &resultant_primes) noexcept +{ + left_ = left; + right_ = right; + delta_ = right_ - left_; + + b_.resize(static_cast(delta_)); + b_.set(); + Settdlimit(); + Sieve(); + + if(plimit_ != 0) + { + Psstestall(); + } + + WriteOutput(resultant_primes); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_INTERVAL_SIEVE_HPP diff --git a/include/boost/math/special_functions/prime_approximation.hpp b/include/boost/math/special_functions/prime_approximation.hpp new file mode 100644 index 0000000000..5da12819a2 --- /dev/null +++ b/include/boost/math/special_functions/prime_approximation.hpp @@ -0,0 +1,36 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP + +namespace boost::math +{ +template +constexpr Integer prime_approximation(const Integer upper_bound) noexcept +{ + auto c = 30 * ::log(113) / 113; // Magic numbers from wikipedia + return static_cast(::floor(c * static_cast(upper_bound) / ::log(static_cast(upper_bound)))); +} + +template +constexpr Integer prime_approximation(const Integer lower_bound, const Integer upper_bound) noexcept +{ + return prime_approximation(upper_bound) - prime_approximation(lower_bound); +} + +template +inline void prime_reserve(Integer upper_bound, std::vector& prime_container) +{ + prime_container.reserve(static_cast(prime_approximation(upper_bound))); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_APPROXIMATION_HPP diff --git a/include/boost/math/special_functions/prime_sieve.hpp b/include/boost/math/special_functions/prime_sieve.hpp new file mode 100644 index 0000000000..fe70dbe7f0 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve.hpp @@ -0,0 +1,495 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math { namespace detail +{ +// https://mathworld.wolfram.com/SieveofEratosthenes.html +// https://www.cs.utexas.edu/users/misra/scannedPdf.dir/linearSieve.pdf +template +void linear_sieve(Integer upper_bound, Container &resultant_primes) +{ + std::size_t least_divisors_size{static_cast(upper_bound + 1)}; + std::unique_ptr least_divisors{new Integer[least_divisors_size]{0}}; + + for (std::size_t i{2}; i < upper_bound; ++i) + { + if (least_divisors[i] == 0) + { + least_divisors[i] = i; + resultant_primes.emplace_back(i); + } + + for (std::size_t j{}; j < resultant_primes.size() && i * resultant_primes[j] <= upper_bound && + resultant_primes[j] <= least_divisors[i] && j < least_divisors_size; ++j) + { + least_divisors[i * static_cast(resultant_primes[j])] = resultant_primes[j]; + } + } +} + +// 4096 is where benchmarked performance of linear_sieve begins to diverge +template +const Integer linear_sieve_limit = Integer(4096); // Constexpr does not work with boost::multiprecision types + +template +void mask_sieve(Integer lower_bound, Integer upper_bound, const PrimeContainer& primes, Container &resultant_primes) +{ + Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + + std::size_t primes_size {}; + auto it{primes.begin()}; + while(it != primes.end() && *it < limit) + { + ++primes_size; + ++it; + } + + const std::size_t n {static_cast(upper_bound - lower_bound + 1)}; + std::unique_ptr is_prime {new bool[n]}; + memset(is_prime.get(), true, sizeof(*is_prime.get()) * (n)); + + // Enable use of thread pool, not SIMD compatible + std::for_each(std::execution::par, primes.begin(), it, [&is_prime, lower_bound, upper_bound](auto prime){ + for(Integer j {std::max(static_cast(prime * prime), static_cast((lower_bound + prime - 1) / prime * prime))}; + j < upper_bound; j += prime) + { + is_prime[static_cast(j - lower_bound)] = false; + } + }); + + if(lower_bound == 1) + { + is_prime[0] = false; + } + + for(Integer i{lower_bound}; i <= upper_bound; ++i) + { + if(is_prime[static_cast(i - lower_bound)]) + { + resultant_primes.emplace_back(i); + } + } +} + +template +void mask_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) +{ + auto limit{std::floor(std::sqrt(static_cast(upper_bound))) + 1}; + std::vector primes {}; + primes.reserve(limit / std::log(limit)); + + boost::math::detail::linear_sieve(limit, primes); + + boost::math::detail::mask_sieve(lower_bound, upper_bound, primes, resultant_primes); +} + +template +constexpr void prime_table(std::size_t min_index, Integer upper_bound, Container &resultant_primes) +{ + std::size_t current_index {min_index}; + Integer current_prime {2}; + + while(current_prime < upper_bound) + { + resultant_primes.emplace_back(current_prime); + ++current_index; + current_prime = prime(current_index); + } +} + +template +constexpr void prime_table(Integer upper_bound, Container &resultant_primes) +{ + prime_table(0, upper_bound, resultant_primes); +} + +template +void segmented_sieve(Integer lower_bound, Integer upper_bound, const PrimesContainer &primes, Container &resultant_primes) +{ + const Integer L1_SIZE {32768}; + const Integer interval {L1_SIZE * 8}; + Integer current_lower_bound{lower_bound}; + Integer current_upper_bound{current_lower_bound + interval}; + + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + std::vector> prime_vectors(ranges + 1); + std::vector> future_manager(ranges); + + auto primes_in_range {static_cast(static_cast(current_upper_bound) / std::log(static_cast(current_upper_bound)) - + static_cast(current_lower_bound) / std::log(static_cast(current_lower_bound)))}; + + for(std::size_t i {}; i < ranges; ++i) + { + prime_vectors[i].reserve(primes_in_range); + + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &primes, &prime_vectors, i]{ + boost::math::detail::IntervalSieve sieve(current_lower_bound, current_upper_bound, primes, prime_vectors[i]); + })); + + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + } + + prime_vectors[ranges].reserve(primes_in_range); + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &primes, &prime_vectors]{ + boost::math::detail::IntervalSieve sieve(current_lower_bound, upper_bound, primes, prime_vectors.back()); + })); + + for(auto &&future : future_manager) + { + if(future.valid()) + { + future.get(); + } + } + + for(auto &v : prime_vectors) + { + resultant_primes.insert(resultant_primes.end(), v.begin(), v.end()); + } +} + +template +void segmented_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) +{ + using boost::math::detail::linear_sieve_limit; + + Integer limit{static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + std::vector primes {}; + primes.reserve(static_cast(limit) / std::log(static_cast(limit))); + + // Prepare for max value so you do not have to calculate this again + if(limit < linear_sieve_limit) + { + boost::math::detail::linear_sieve(static_cast(limit), primes); + } + + else + { + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::segmented_sieve(linear_sieve_limit, limit, primes, primes); + } + + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes, resultant_primes); +} + +template +void sequential_segmented_sieve(Integer lower_bound, Integer upper_bound, Container &resultant_primes) +{ + const Integer L1_SIZE {32768}; + const Integer interval {L1_SIZE * 8}; + Integer current_lower_bound{lower_bound}; + Integer current_upper_bound{current_lower_bound + interval}; + + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + boost::math::detail::IntervalSieve sieve(current_lower_bound, current_upper_bound, resultant_primes, resultant_primes); + if(ranges == 0) + { + return; + } + + for(std::size_t i {}; i < ranges; ++i) + { + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + sieve.NewRange(current_lower_bound, current_upper_bound, resultant_primes); + } +} + +// SFINAE for dual interface +template +class is_container +{ + typedef char yes; + struct no { char x[2]; }; + + template static yes test( decltype(&U::size) ); + template static no test(...); + +public: + enum { value = sizeof(test(0)) == sizeof(char) }; +}; + +template +void prime_sieve_impl(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) +{ + using boost::math::detail::linear_sieve_limit; + + if(upper_bound == 2) + { + return; + } + + if(upper_bound <= linear_sieve_limit) + { + boost::math::detail::linear_sieve(static_cast(upper_bound), primes); + } + + else if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || + std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, upper_bound, primes); + } + + else + { + std::vector small_primes {}; + small_primes.reserve(1028); + + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); + }); + std::thread t2([upper_bound, &primes] { + boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), upper_bound, primes); + }); + + t1.join(); + t2.join(); + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } +} + +template +void prime_sieve_impl(Integer upper_bound, Container &primes) +{ + prime_sieve_impl(std::execution::seq, upper_bound, primes); +} + +template +inline decltype(auto) prime_sieve_wrapper(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator resultant_primes) +{ + std::vector primes; + prime_reserve(upper_bound, primes); + prime_sieve_impl(policy, upper_bound, primes); + + return std::move(primes.begin(), primes.end(), resultant_primes); +} + +template +inline decltype(auto) prime_sieve_wrapper(Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_sieve_wrapper(std::execution::seq, upper_bound, resultant_primes); +} + +template +void prime_range_impl(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, Container &primes) +{ + using boost::math::detail::linear_sieve_limit; + Integer limit {static_cast(std::floor(std::sqrt(static_cast(upper_bound)))) + 1}; + + if(upper_bound == 2) + { + return; + } + + if(upper_bound <= linear_sieve_limit) + { + boost::math::detail::linear_sieve(static_cast(upper_bound), primes); + } + + else if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + if(limit <= linear_sieve_limit) + { + boost::math::detail::linear_sieve(limit, primes); + + if(lower_bound <= limit) + { + boost::math::detail::sequential_segmented_sieve(limit, upper_bound, primes); + } + else + { + boost::math::detail::sequential_segmented_sieve(lower_bound, upper_bound, primes); + } + + } + + else + { + boost::math::detail::linear_sieve(linear_sieve_limit, primes); + boost::math::detail::sequential_segmented_sieve(linear_sieve_limit, limit, primes); + boost::math::detail::sequential_segmented_sieve(lower_bound, upper_bound, primes); + } + } + + else + { + std::vector small_primes {}; + + if(limit <= static_cast(linear_sieve_limit * 2)) + { + small_primes.reserve(1028); + + std::thread t1([limit, &small_primes] { + boost::math::detail::linear_sieve(limit, small_primes); + }); + + std::thread t2([lower_bound, limit, upper_bound, &primes] { + if(lower_bound <= limit) + { + boost::math::detail::segmented_sieve(limit, upper_bound, primes); + } + else + { + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes); + } + + }); + + t1.join(); + t2.join(); + + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + } + + else + { + boost::math::prime_reserve(limit, small_primes); + + std::thread t1([&small_primes] { + boost::math::detail::linear_sieve(static_cast(linear_sieve_limit * 2), small_primes); + }); + + std::thread t2([limit, &primes] { + boost::math::detail::segmented_sieve(static_cast(linear_sieve_limit * 2), limit, primes); + }); + + t1.join(); + t2.join(); + + primes.insert(primes.begin(), small_primes.begin(), small_primes.end()); + + boost::math::detail::segmented_sieve(lower_bound, upper_bound, primes); + } + } + + auto it{primes.begin()}; + while(*it < lower_bound && it != primes.end()) + { + ++it; + } + + primes.erase(primes.begin(), it); +} + +template +inline void prime_range_impl(Integer lower_bound, Integer upper_bound, Container &primes) +{ + prime_range_impl(std::execution::seq, lower_bound, upper_bound, primes); +} + +template +inline decltype(auto) prime_range_wrapper(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, OutputIterator resultant_primes) +{ + std::vector primes; + prime_reserve(lower_bound, upper_bound, primes); + prime_range_impl(policy, lower_bound, upper_bound, primes); + + return std::move(primes.begin(), primes.end(), resultant_primes); +} + +template +inline decltype(auto) prime_range_wrapper(Integer lower_bound, Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_range_wrapper(std::execution::seq, lower_bound, upper_bound, resultant_primes); +} + +} // End namespace detail + + + + + +template +inline decltype(auto) prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, T output) +{ + if constexpr (detail::is_container>::value) + { + detail::prime_sieve_impl(policy, upper_bound, *output); + return; + } + + else + { + detail::prime_sieve_wrapper(policy, upper_bound, output); + return output; + } +} + +template +inline decltype(auto) prime_sieve(Integer upper_bound, T output) +{ + return prime_sieve(std::execution::seq, upper_bound, output); +} + +template +inline decltype(auto) prime_range(ExecutionPolicy&& policy, Integer lower_bound, Integer upper_bound, T output) +{ + if constexpr (detail::is_container>::value) + { + detail::prime_range_impl(policy, lower_bound, upper_bound, *output); + return; + } + + else + { + detail::prime_range_wrapper(policy, lower_bound, upper_bound, output); + return output; + } +} + +template +inline decltype(auto) prime_range(Integer lower_bound, Integer upper_bound, T output) +{ + return prime_range(std::execution::seq, lower_bound, upper_bound, output); +} +} + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP diff --git a/include/boost/math/special_functions/prime_sieve_iter.hpp b/include/boost/math/special_functions/prime_sieve_iter.hpp new file mode 100644 index 0000000000..9b3a1691f6 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve_iter.hpp @@ -0,0 +1,150 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::detail::prime_sieve +{ +inline std::size_t L1D_SIZE {32'768}; + +template +decltype(auto) sequential_segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const Integer interval {static_cast(L1D_SIZE * 8)}; + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + interval}; + + if (current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + + std::size_t ranges {static_cast((upper_bound - lower_bound) / interval)}; + + IntervalSieve sieve(current_lower_bound, current_upper_bound, resultant_primes); + + for(std::size_t i {}; i < ranges; ++i) + { + current_lower_bound = current_upper_bound; + current_upper_bound += interval; + if(current_upper_bound > upper_bound) + { + current_upper_bound = upper_bound; + } + resultant_primes = sieve.NewRange(current_lower_bound, current_upper_bound); + } + + return resultant_primes; +} + +template +decltype(auto) segmented_sieve(const Integer lower_bound, const Integer upper_bound, OutputIterator resultant_primes) +{ + const auto num_threads {std::thread::hardware_concurrency() > 0 ? std::thread::hardware_concurrency() : 2u}; + const Integer thread_range {(upper_bound - lower_bound) / static_cast(num_threads)}; + + std::vector> prime_vectors(num_threads); + std::vector> future_manager; + future_manager.reserve(num_threads); + + Integer current_lower_bound {lower_bound}; + Integer current_upper_bound {current_lower_bound + thread_range}; + + for(std::size_t i {}; i < num_threads - 1; ++i) + { + prime_vectors[i].resize(static_cast(prime_approximation(current_lower_bound, current_upper_bound))); + + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, current_upper_bound, &prime_vectors, i]{ + sequential_segmented_sieve(current_lower_bound, current_upper_bound, prime_vectors[i].begin()); + })); + + current_lower_bound = current_upper_bound; + current_upper_bound += thread_range; + } + + prime_vectors.back().resize(static_cast(prime_approximation(current_lower_bound, upper_bound))); + future_manager.emplace_back(std::async(std::launch::async, [current_lower_bound, upper_bound, &prime_vectors]{ + sequential_segmented_sieve(current_lower_bound, upper_bound, prime_vectors.back().begin()); + })); + + std::size_t i {}; + for(auto&& future : future_manager) + { + future.get(); // Blocks to maintain proper sorting + + for(auto& val : prime_vectors[i]) + { + *resultant_primes++ = std::move(val); + } + + ++i; + } + + return resultant_primes; +} +} + +namespace boost::math +{ +template +decltype(auto) prime_sieve_iter(ExecutionPolicy&& policy, const Integer upper_bound, OutputIterator resultant_primes) +{ + if (upper_bound == 2) + { + return resultant_primes; + } + + else if (upper_bound <= detail::prime_sieve::linear_sieve_limit) + { + detail::prime_sieve::linear_sieve(upper_bound, resultant_primes); + return resultant_primes; + } + + resultant_primes = detail::prime_sieve::linear_sieve(detail::prime_sieve::linear_sieve_limit, resultant_primes); + + if constexpr (std::is_same_v, decltype(std::execution::seq)> + #if __cpp_lib_execution > 201900 + || std::is_same_v, decltype(std::execution::unseq)> + #endif + ) + { + resultant_primes = detail::prime_sieve::sequential_segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); + } + + else + { + resultant_primes = detail::prime_sieve::segmented_sieve(detail::prime_sieve::linear_sieve_limit, upper_bound, resultant_primes); + } + + return resultant_primes; +} + +template +inline decltype(auto) prime_sieve_iter(const Integer upper_bound, OutputIterator resultant_primes) +{ + return prime_sieve_iter(std::execution::seq, upper_bound, resultant_primes); +} + +template +inline void set_l1d_size(const Integer size) +{ + detail::prime_sieve::L1D_SIZE = static_cast(size); +} +} + +#endif // BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SEIVE_ITER_HPP diff --git a/include/boost/math/special_functions/prime_sieve_jm.hpp b/include/boost/math/special_functions/prime_sieve_jm.hpp new file mode 100644 index 0000000000..7b5232f5a2 --- /dev/null +++ b/include/boost/math/special_functions/prime_sieve_jm.hpp @@ -0,0 +1,534 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_JM_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_JM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace jm { namespace detail +{ +// https://mathworld.wolfram.com/SieveofEratosthenes.html +template +struct simple_bitset +{ + std::unique_ptr bits; + std::size_t m_size; + simple_bitset(std::size_t n) : bits(new I[n / (sizeof(I) * CHAR_BIT) + (n % (sizeof(I) * CHAR_BIT) ? 1 : 0)]), m_size(n) + { + std::memset(bits.get(), 0xff, n / CHAR_BIT + (n % CHAR_BIT ? 1 : 0)); + } + static constexpr std::size_t ln2(std::size_t n) + { + return n <= 1 ? 0 : 1 + ln2(n >> 1); + } + I test(std::size_t n)const + { + constexpr I masks[] = { static_cast(1uLL), static_cast(2uLL), static_cast(4uLL), static_cast(8uLL), static_cast(16uLL), static_cast(32uLL), static_cast(64uLL), static_cast(128uLL), static_cast(256uLL), + static_cast(1uLL << 9), static_cast(1uLL << 10), static_cast(1uLL << 11), static_cast(1uLL << 12), static_cast(1uLL << 13), static_cast(1uLL << 14), static_cast(1uLL << 15), static_cast(1uLL << 16), + static_cast(1uLL << 17), static_cast(1uLL << 18), static_cast(1uLL << 19), static_cast(1uLL << 20), static_cast(1uLL << 21), static_cast(1uLL << 22), static_cast(1uLL << 23), static_cast(1uLL << 24), + static_cast(1uLL << 25), static_cast(1uLL << 26), static_cast(1uLL << 27), static_cast(1uLL << 28), static_cast(1uLL << 29), static_cast(1uLL << 30), static_cast(1uLL << 31), static_cast(1uLL << 32), + static_cast(1uLL << 33), static_cast(1uLL << 34), static_cast(1uLL << 35), static_cast(1uLL << 36), static_cast(1uLL << 37), static_cast(1uLL << 38), static_cast(1uLL << 39), static_cast(1uLL << 40), + static_cast(1uLL << 41), static_cast(1uLL << 42), static_cast(1uLL << 43), static_cast(1uLL << 44), static_cast(1uLL << 45), static_cast(1uLL << 46), static_cast(1uLL << 47), static_cast(1uLL << 48), + static_cast(1uLL << 49), static_cast(1uLL << 50), static_cast(1uLL << 51), static_cast(1uLL << 52), static_cast(1uLL << 53), static_cast(1uLL << 54), static_cast(1uLL << 55), static_cast(1uLL << 56), + static_cast(1uLL << 57), static_cast(1uLL << 58), static_cast(1uLL << 59), static_cast(1uLL << 60), static_cast(1uLL << 61), static_cast(1uLL << 62), static_cast(1uLL << 63), + }; + constexpr I mask = (sizeof(I) * CHAR_BIT) - 1; + constexpr std::size_t shift = ln2(sizeof(I) * CHAR_BIT); + //BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + return bits[n >> shift] & masks[n & mask]; + } + void clear(std::size_t n) + { + constexpr I masks[] = { static_cast(~1uLL), static_cast(~2uLL), static_cast(~4uLL), static_cast(~8uLL), static_cast(~16uLL), static_cast(~32uLL), static_cast(~64uLL), static_cast(~128uLL), static_cast(~256uLL), + static_cast(~(1uLL << 9)), static_cast(~(1uLL << 10)), static_cast(~(1uLL << 11)), static_cast(~(1uLL << 12)), static_cast(~(1uLL << 13)), static_cast(~(1uLL << 14)), static_cast(~(1uLL << 15)), static_cast(~(1uLL << 16)), + static_cast(~(1uLL << 17)), static_cast(~(1uLL << 18)), static_cast(~(1uLL << 19)), static_cast(~(1uLL << 20)), static_cast(~(1uLL << 21)), static_cast(~(1uLL << 22)), static_cast(~(1uLL << 23)), static_cast(~(1uLL << 24)), + static_cast(~(1uLL << 25)), static_cast(~(1uLL << 26)), static_cast(~(1uLL << 27)), static_cast(~(1uLL << 28)), static_cast(~(1uLL << 29)), static_cast(~(1uLL << 30)), static_cast(~(1uLL << 31)), static_cast(~(1uLL << 32)), + static_cast(~(1uLL << 33)), static_cast(~(1uLL << 34)), static_cast(~(1uLL << 35)), static_cast(~(1uLL << 36)), static_cast(~(1uLL << 37)), static_cast(~(1uLL << 38)), static_cast(~(1uLL << 39)), static_cast(~(1uLL << 40)), + static_cast(~(1uLL << 41)), static_cast(~(1uLL << 42)), static_cast(~(1uLL << 43)), static_cast(~(1uLL << 44)), static_cast(~(1uLL << 45)), static_cast(~(1uLL << 46)), static_cast(~(1uLL << 47)), static_cast(~(1uLL << 48)), + static_cast(~(1uLL << 49)), static_cast(~(1uLL << 50)), static_cast(~(1uLL << 51)), static_cast(~(1uLL << 52)), static_cast(~(1uLL << 53)), static_cast(~(1uLL << 54)), static_cast(~(1uLL << 55)), static_cast(~(1uLL << 56)), + static_cast(~(1uLL << 57)), static_cast(~(1uLL << 58)), static_cast(~(1uLL << 59)), static_cast(~(1uLL << 60)), static_cast(~(1uLL << 61)), static_cast(~(1uLL << 62)), static_cast(~(1uLL << 63)), + }; + constexpr I mask = (sizeof(I) * CHAR_BIT) - 1; + constexpr std::size_t shift = ln2(sizeof(I) * CHAR_BIT); + //BOOST_ASSERT((n >> shift) < (m_size / (sizeof(I) * CHAR_BIT) + (m_size % (sizeof(I) * CHAR_BIT) ? 1 : 0))); + bits[n >> shift] &= masks[n & mask]; + } + std::size_t size()const { return m_size; } + void reset(){ std::memset(bits.get(), 0xff, m_size / CHAR_BIT + (m_size % CHAR_BIT ? 1 : 0)); } +}; + +template <> +inline std::uint64_t simple_bitset::test(std::size_t n)const +{ + constexpr std::uint64_t mask = 63; + constexpr std::size_t shift = 6; + return _bittest64(reinterpret_cast(bits.get()) + (n >> shift), n & mask); +} +template <> +inline void simple_bitset::clear(std::size_t n) +{ + constexpr std::uint64_t mask = 63; + constexpr std::size_t shift = 6; + _bittestandreset64(reinterpret_cast(bits.get()) + (n >> shift), n & mask); +} + +struct dynamic_bitset_wrapper +{ + boost::dynamic_bitset<> data; + + bool test(std::size_t n)const + { + return data.test(n); + } + void clear(std::size_t n) + { + data.set(n, false); + } + + dynamic_bitset_wrapper(std::size_t n) + { + data.resize(n, true); + } + std::size_t size()const { return data.size(); } + void reset() + { + data.set(0, data.size(), true); + } +}; + +typedef simple_bitset bitmask_type; +//typedef dynamic_bitset_wrapper bitmask_type; + +template +constexpr bool has_output_iterator_terminated(const T&) +{ + return false; +} + +template +struct dual_output_iterator +{ + OutputIterator out; + Container& container; + + dual_output_iterator(OutputIterator o, Container& c) : out(o), container(c) {} + + struct proxy + { + dual_output_iterator& ref; + proxy(dual_output_iterator& o) : ref(o) {} + ~proxy() { ++ref.out; } + dual_output_iterator& operator*() { return ref; } + }; + dual_output_iterator& operator++() { ++out; return *this; } + proxy operator++(int) { return proxy(*this); } + dual_output_iterator& operator*() { return *this; } + template + dual_output_iterator& operator=(const Value& val) + { + *out = val; + container.push_back(val); + return *this; + } +}; + +template +inline bool linear_sieve_classical(Sieve& masks, OutputIterator out) +{ + Integer max_sqr = Integer(1) << (std::numeric_limits::digits / 2 - 1); + *out++ = 2u; + for (Integer index = 1; index < masks.size(); ++index) + { + if (masks.test(index)) + { + *out++ = 2 * index + 1; + if (has_output_iterator_terminated(out)) + return false; + if(index < max_sqr) + for (Integer clear = index * (2 * index + 2); clear < masks.size(); clear += index * 2 + 1) + masks.clear(clear); + } + } + return true; +} + +template +inline bool linear_sieve_classical_segment(Container& primes, Sieve& masks, Integer start_offset, Integer max_points, OutputIterator out, bool output_to_container) +{ + masks.reset(); + Integer limit = static_cast(std::sqrt(static_cast(start_offset + max_points)) + 1); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < primes.size(); ++i) + { + Integer prime = primes[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + // + // Now walk through the sieve outputting primes. + // + for (Integer index = 0; index < max_points / 2; ++index) + { + if (masks.test(index)) + { + *out++ = start_offset + 2 * index + 1; + if (output_to_container) + primes.push_back(start_offset + 2 * index + 1); + if (has_output_iterator_terminated(out)) + return false; + } + } + return true; +} + + +// 4096 is where benchmarked performance of linear_sieve begins to diverge +template +const Integer linear_sieve_limit = Integer(524288); // Constexpr does not work with boost::multiprecision types + +template +inline bool linear_sieve_classical_segment_threaded(std::atomic* current_max_processed_value, std::mutex* lock, Container* primes, Integer start_offset, Integer end_offset, Integer stride, OutputIterator out, bool output_to_container) +{ + bitmask_type masks(linear_sieve_limit / 2); + + std::unique_lock l(*lock); + std::size_t prime_count = primes->size(); + l.unlock(); + + while (start_offset < end_offset) + { + Integer max_points = end_offset - start_offset > linear_sieve_limit ? linear_sieve_limit : end_offset - start_offset; + Integer limit = static_cast(std::sqrt(static_cast(start_offset + max_points)) + 1); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < prime_count; ++i) + { + Integer prime = (*primes)[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + // + // Now we must wait until the previous thread has completed the segment before this one: + // + while (current_max_processed_value->load() != start_offset) + std::this_thread::yield(); + // + // Maybe process all the primes we didn't have access to in the loop above: + // + if ((*primes)[prime_count - 1] < limit) + { + l.lock(); + prime_count = primes->size(); + l.unlock(); + // Begin by striking out odd numbered multiples of all the primes we have so far. + // 1-based index, we only have odd numbers in the sieve so don't need to handle 2: + for (std::size_t i = 1; i < prime_count; ++i) + { + Integer prime = (*primes)[i]; + if (prime > limit) + break; + Integer index = prime - start_offset % prime; + if ((index & 1) == 0) + index += prime; + index >>= 1; + for (; index < max_points / 2; index += prime) + masks.clear(index); + } + } + // + // Now walk through the sieve outputting primes. + // + l.lock(); + for (Integer index = 0; index < max_points / 2; ++index) + { + if (masks.test(index)) + { + *out++ = start_offset + 2 * index + 1; + if (output_to_container) + primes->push_back(start_offset + 2 * index + 1); + if (has_output_iterator_terminated(out)) + return false; + } + } + prime_count = primes->size(); + l.unlock(); + // + // We're done on this segment, signal the next thread: + // + masks.reset(); + *current_max_processed_value = start_offset + max_points; + start_offset += stride; + } + return true; +} + +template +void prime_sieve_imp(ExecutionPolicy&& policy, Integer upper_bound, Container& primes, OutputIterator out, bool output_to_container) +{ + if (upper_bound <= 2) + { + return; + } + + bitmask_type sieve((upper_bound <= linear_sieve_limit ? upper_bound : linear_sieve_limit) / 2); + + if (output_to_container && (upper_bound > linear_sieve_limit)) + { + if (!jm::detail::linear_sieve_classical(sieve, dual_output_iterator(out, primes))) + return; + } + else + { + if (!jm::detail::linear_sieve_classical(sieve, out)) + return; + } + + if (upper_bound <= linear_sieve_limit) + return; + + if constexpr (std::is_same_v>, std::execution::sequenced_policy> +#if __cpp_lib_execution > 201900 + || std::is_same_v>, std::execution::unsequenced_policy> +#endif + ) + { + for (Integer offset = linear_sieve_limit; offset < upper_bound; offset += linear_sieve_limit) + { + if (!linear_sieve_classical_segment(primes, sieve, offset, (std::min)(linear_sieve_limit, upper_bound - offset), out, output_to_container)) + return; + } + } + else + { + unsigned hardware_concurrency = std::thread::hardware_concurrency(); + if ((hardware_concurrency == 0) || (upper_bound <= linear_sieve_limit * 2)) + { + // + // No point in using threads as there's only one more segment to process: + // + linear_sieve_classical_segment(primes, sieve, linear_sieve_limit, upper_bound - linear_sieve_limit, out, output_to_container); + } + else + { + unsigned n_threads = (upper_bound - linear_sieve_limit) / linear_sieve_limit +((upper_bound - linear_sieve_limit) % linear_sieve_limit ? 1 : 0); + n_threads = (std::min)(n_threads, hardware_concurrency / 2); + + std::atomic current_max_processed_value = linear_sieve_limit; + std::vector> threads(n_threads); + std::mutex mutex; + + for (unsigned i = 0; i < n_threads; ++i) + threads[i].reset(new std::thread(linear_sieve_classical_segment_threaded, + ¤t_max_processed_value, &mutex, &primes, linear_sieve_limit * (i + 1), upper_bound, + linear_sieve_limit * n_threads, out, output_to_container)); + + for (unsigned i = 0; i < n_threads; ++i) + threads[i]->join(); + } + } +} + +template +struct counted_output_iterator +{ + std::shared_ptr target; + OutputIterator out; + counted_output_iterator(std::size_t n, OutputIterator o) : target(new std::size_t()), out(o) + { + *target = n; + } + + struct counted_output_iterator_proxy + { + counted_output_iterator& out; + counted_output_iterator_proxy(counted_output_iterator& o) : out(o) {} + counted_output_iterator& operator*()const { return out; } + }; + + counted_output_iterator_proxy operator++(int){ return counted_output_iterator_proxy(*this); } + counted_output_iterator& operator++() { return *this; } + counted_output_iterator& operator*() { return *this; } + + template + counted_output_iterator& operator=(const T& value) + { + *out++ = value; + --*target; + return *this; + } +}; + +template +constexpr bool has_output_iterator_terminated(const counted_output_iterator& out) +{ + return *(out.target) == 0; +} + +template +struct prime_factors_output_iterator +{ + Integer number; + Container& out; + + prime_factors_output_iterator(Integer n, Container& o) : number(n), out(o) {} + + prime_factors_output_iterator& operator = (Integer prime) + { + std::size_t count = 0; + while (number % prime == 0) + { + ++count; + number /= prime; + out.push_back(std::make_pair(prime, count)); + } + return *this; + } + prime_factors_output_iterator& operator*() { return *this; } + prime_factors_output_iterator& operator++() { return *this; } + prime_factors_output_iterator& operator++(int) { return *this; } + + bool complete()const { return number == 1; } +}; + +template +constexpr bool has_output_iterator_terminated(const prime_factors_output_iterator& out) +{ + return out.complete(); +} + + +} // End namespace detail + +template +void prime_reserve(Integer upper_bound, Container &prime_container) +{ + typedef typename Container::size_type size_type; + // + // How big does the container have to be to hold all the primes in the range [2, upper_bound]? + // + // The prime number theorem (https://en.wikipedia.org/wiki/Prime_number_theorem) gives an asymptotic + // estimate of upper_bound / log(upper_bound), but this only really holds as upper_bound -> infinity. + // + // Non asymptotic limits follow: + // + double limit; + if (upper_bound >= 60184) + // Dusart, Pierre (2010). "Estimates of Some Functions Over Primes without R.H" + limit = upper_bound / (std::log(static_cast(upper_bound)) - 1.1); + else + { + // There are other loose relations available, but it's easy enough to search + // all x in [3, 60184] and note how many primes there are and by how much + // it differs from x / log(x). The fudge factor of 1.25 is sufficient + // always provide enough space: + // + limit = 1.25 * upper_bound / std::log(static_cast(upper_bound)); + } + prime_container.reserve(static_cast(limit)); +} + +template +inline std::enable_if_t::value> prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, Container &primes) +{ + typedef typename Container::value_type integer_type; + prime_reserve(upper_bound, primes); + return detail::prime_sieve_imp(policy, static_cast(upper_bound), primes, std::back_inserter(primes), false); +} + +template +inline std::enable_if_t::value> prime_sieve(ExecutionPolicy&& policy, Integer upper_bound, OutputIterator out) +{ + std::vector primes; + if (upper_bound > detail::linear_sieve_limit) + prime_reserve(upper_bound, primes); + return detail::prime_sieve_imp(policy, upper_bound, primes, out, true); +} + +template +inline std::enable_if_t::value> prime_sieve(Integer upper_bound, Container &primes) +{ + prime_sieve(std::execution::seq, upper_bound, primes); +} +template +inline std::enable_if_t::value> prime_sieve(Integer upper_bound, OutputIterator primes) +{ + prime_sieve(std::execution::seq, upper_bound, primes); +} + +template +inline std::enable_if_t::value> prime_sieve_n(ExecutionPolicy&& policy, Integer n, Container &primes) +{ + typedef typename Container::value_type integer_type; + prime_reserve(n, primes); + return detail::prime_sieve_imp(policy, (std::numeric_limits::max)(), primes, detail::counted_output_iterator >(n, std::back_inserter(primes)), false); +} + +template +inline std::enable_if_t::value> prime_sieve_n(ExecutionPolicy&& policy, Integer n, OutputIterator out) +{ + std::vector primes; + return detail::prime_sieve_imp(policy, (std::numeric_limits::max)(), primes, detail::counted_output_iterator(n, out), true); +} + +template +inline std::enable_if_t::value> prime_sieve_n(Integer upper_bound, Container &primes) +{ + prime_sieve_n(std::execution::seq, upper_bound, primes); +} +template +inline std::enable_if_t::value> prime_sieve_n(Integer upper_bound, OutputIterator primes) +{ + prime_sieve_n(std::execution::seq, upper_bound, primes); +} + +template +inline std::vector > factorize(ExecutionPolicy&& policy, Integer n) +{ + std::vector primes; + std::vector > result; + detail::prime_sieve_imp(policy, n / 2, primes, detail::prime_factors_output_iterator(n, result), true); + return result; +} +template +inline std::vector > factorize(Integer n) +{ + return factorize(std::execution::seq, n); +} + +} // namespace boost::math + + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_SIEVE_HPP diff --git a/include/boost/math/special_functions/prime_wheel.hpp b/include/boost/math/special_functions/prime_wheel.hpp new file mode 100644 index 0000000000..0999eac5cf --- /dev/null +++ b/include/boost/math/special_functions/prime_wheel.hpp @@ -0,0 +1,293 @@ +// Copyright 2020 Matt Borland and Jonathan Sorenson +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP +#define BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP + +#include +#include +#include +#include +#include + +namespace boost::math::detail +{ +template +class Wheel +{ +private: + struct Wheelrec + { + std::int_fast32_t rp; + std::int_fast32_t dist; + std::int_fast32_t pos; + std::int_fast32_t inv; + }; + + std::unique_ptr W_; + Integer M_; + Integer k_; + Integer phi_; + + static constexpr std::array P_ {2, 3, 5, 7, 11, 13, 17, 19}; + + void build(Integer korsize); + +public: + Wheel() : W_{nullptr}, M_{0}, k_{0}, phi_{0} {}; + explicit Wheel(Integer korsize) { build(korsize); } + explicit Wheel(const Wheel &x) { build(x.K()); } + + constexpr bool operator!() const noexcept { return W_ == nullptr; } + constexpr const Wheelrec& operator[](const Integer i) const noexcept { return W_[i % M_]; } + const Wheel& operator=(const Wheel &x) + { + if(this != &x) + { + build(x.K()); + } + return *this; + } + + constexpr Integer Size() const noexcept { return M_; } + constexpr Integer K() const noexcept { return k_; } + constexpr Integer Phi() const noexcept { return phi_; } + + constexpr Integer Next(const Integer i) const noexcept { return i + W_[i % M_].dist; } + constexpr Integer MakeRP(const Integer i) const noexcept + { + if(W_[i % M_].rp) + { + return i; + } + return Next(i); + } + constexpr Integer Prev(const Integer i) const noexcept { return i - W_[(M_ - (i % M_)) % M_].dist; } + constexpr Integer Pos(const Integer i) const noexcept { return phi_ * (i / M_) + W_[i % M_].pos; } + constexpr Integer Inv(const Integer i) const noexcept { return M_ * (i / phi_) + W_[i % phi_].inv; } + + void Print(); +}; + +template +void Wheel::build(Integer korsize) +{ + // Calculate k_ and M_ + if(korsize >= 10) + { + --korsize; + for(k_ = 0; korsize > 0; ++k_) + { + korsize /= P_[k_]; + } + } + else + { + k_ = korsize; + } + + Integer i {0}; + Integer dist {0}; + Integer pos {1}; + + for(M_ = 1; i < k_; ++i) + { + M_ *= P_[i]; + } + + W_ = std::make_unique(M_); + + // Compute the RP field + for(i = 0; i < M_; ++i) + { + W_[i].rp = 1; + } + + for(i = 0; i < k_; ++i) + { + for(Integer j {0}; j < M_; j += P_[i]) + { + W_[j].rp = 0; + } + } + + // Compute the dist field + W_[M_- 1].dist = 2; + for(i = M_ - 2; i >= 0; --i) + { + W_[i].dist = ++dist; + if(W_[i].rp) + { + dist = 0; + } + } + + // Copute pos and inv fields + for(i = 0; i < M_; ++i) + { + W_[i].inv = 0; + if(W_[i].rp) + { + W_[pos].inv = i; + W_[i].pos = pos++; + } + else + { + W_[i].pos = 0; + } + + } + + W_[0].inv = -1; + phi_ = W_[M_- 1].pos; +} + +template +void Wheel::Print() +{ + std::int_fast32_t i {}; + std::cout << "Wheel size = " << this->Size() + << "\nk = " << this->K() + << "\nphi(M) = " << this->Phi() << std::endl; + + // Verify size + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << i << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nRP Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].rp << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nDist Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].dist << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nPos Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(3) << W_[i].pos << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + + std::cout << "\n\nInv Field\n"; + for(i = 0; i < this->Size(); ++i) + { + std::cout << std::setw(4) << W_[i].inv << ','; + if(i % 25 == 24) + { + std::cout << std::endl; + } + } + std::cout << std::endl; +} + +// Pre-computed MOD 210 wheel +template +class MOD210Wheel final +{ +private: + static constexpr auto M_ {210}; + static constexpr auto k_ {4}; + static constexpr auto phi_ {28}; + + static constexpr std::array rp_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 + }; + + static constexpr std::array inv_ + { + -1, 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, + 107, 109, 113, 121, 127, 131, 137, 139, 143, 149, 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + static constexpr std::array pos_ + { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 5, 0, 0, 0, 6, 0, + 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, 0, 0, 9, 0, 0, 0, 10, 0, 11, 0, 0, 0, 12, 0, 0, + 0, 0, 0, 13, 0, 0, 0, 0, 0, 14, 0, 15, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 20, 0, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 23, 0, 24, 0, 0, 0, 25, 0, 26, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 30, 0, 0, 0, 0, 0, 31, 0, 32, 0, 0, 0, 33, 0, 0, 0, 0, 0, 34, + 0, 35, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 0, 0, 38, 0, 39, 0, 0, 0, 40, 0, + 0, 0, 0, 0, 41, 0, 42, 0, 0, 0, 0, 0, 43, 0, 0, 0, 44, 0, 45, 0, 0, 0, 46, 0, 47, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 + }; + + static constexpr std::array dist_ + { + 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, + 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 6, 5, + 4, 3, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 4, 3, 2, + 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, + 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2, + 1, 6, 5, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, + 4, 3, 2, 1, 2, 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 2 + }; + +public: + constexpr MOD210Wheel() = default; + ~MOD210Wheel() = default; + + constexpr auto Size() const noexcept { return M_; } + constexpr auto K() const noexcept { return k_; } + constexpr auto Phi() const noexcept { return phi_; } + + constexpr auto Next(const Integer i) const noexcept { return i + dist_[static_cast(i % M_)]; } + constexpr auto MakeRP(const Integer i) const noexcept + { + if(rp_[i % M_]) + { + return i; + } + return Next(i); + } + constexpr auto Prev(const Integer i) const noexcept { return i - dist_[static_cast((M_ - (i % M_)) % M_)]; } + constexpr auto Pos(const Integer i) const noexcept { return phi_ * (i / M_) + pos_[static_cast(i % M_)]; } + constexpr auto Inv(const Integer i) const noexcept { return M_ * (i / phi_) + inv_[static_cast(i % phi_)]; } +}; +} + +#endif //BOOST_MATH_SPECIAL_FUNCTIONS_PRIME_WHEEL_HPP diff --git a/reporting/performance/prime_sieve_performance.cpp b/reporting/performance/prime_sieve_performance.cpp new file mode 100644 index 0000000000..e9a49c1bd3 --- /dev/null +++ b/reporting/performance/prime_sieve_performance.cpp @@ -0,0 +1,202 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +// Individual Algos +template +inline auto linear_sieve_helper(Integer upper_bound, std::vector primes) -> std::vector +{ + boost::math::detail::linear_sieve(upper_bound, primes); + return primes; +} + +template +void linear_sieve(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(linear_sieve_helper(upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +void linear_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes(boost::math::prime_approximation(upper)); + + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + +template +void stepanov_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)) / 2 - 1; + std::vector primes(upper, 1); + + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::stepanov_sieve(upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + +template +inline auto mask_sieve_helper(Integer lower_bound, Integer upper_bound, std::vector primes) -> std::vector +{ + boost::math::detail::mask_sieve(lower_bound, upper_bound, primes); + return primes; +} + +template +void mask_sieve(benchmark::State& state) +{ + Integer lower = static_cast(2); + Integer upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(mask_sieve_helper(lower, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +inline auto interval_sieve_helper(Integer lower_bound, Integer upper_bound, std::vector primes) -> std::vector +{ + std::vector pre_sieved_primes {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}; + boost::math::detail::IntervalSieve sieve(lower_bound, upper_bound, pre_sieved_primes, primes); + return primes; +} + +template +void interval_sieve(benchmark::State& state) +{ + Integer lower = static_cast(2); + Integer upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(interval_sieve_helper(lower, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +// Complete Implementations +template +inline auto prime_sieve_helper(ExecuitionPolicy policy, Integer upper, Container primes) +{ + boost::math::prime_sieve(policy, upper, &primes); + return primes; +} + +template +void prime_sieve(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + for(auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(prime_sieve_helper(std::execution::par, upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +inline decltype(auto) prime_sieve_oi_helper(ExecutionPolicy policy, Integer upper, OutputIterator resultant_primes) +{ + return boost::math::prime_sieve_iter(policy, upper, resultant_primes); +} + +template +void prime_sieve_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes(boost::math::prime_approximation(upper)); + + for(auto _ : state) + { + benchmark::DoNotOptimize(prime_sieve_oi_helper(std::execution::par, upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + +template +void prime_sieve_wrapper(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes(boost::math::prime_approximation(upper)); + + for(auto _ : state) + { + benchmark::DoNotOptimize(boost::math::prime_sieve(std::execution::par, upper, primes.begin())); + } + state.SetComplexityN(state.range(0)); +} + +template +inline auto kimwalish_primes_helper(Integer upper, std::vector primes) -> std::vector +{ + primesieve::generate_primes(upper, &primes); + return primes; +} + +template +void kimwalish_primes(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + for (auto _ : state) + { + std::vector primes; + benchmark::DoNotOptimize(kimwalish_primes_helper(upper, primes)); + } + state.SetComplexityN(state.range(0)); +} + +// Invidiual Implementations +// Linear +//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); + +// Linear output iterator +//BENCHMARK_TEMPLATE(linear_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(stepanov_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN)->UseRealTime(); + +// Segmented +//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); + +// Complete Implemenations +//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_wrapper, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_oi, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); // Benchmark +//BENCHMARK_TEMPLATE(prime_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); + +BENCHMARK_MAIN(); diff --git a/reporting/performance/prime_sieve_performance_jm.cpp b/reporting/performance/prime_sieve_performance_jm.cpp new file mode 100644 index 0000000000..6fcee3b6c8 --- /dev/null +++ b/reporting/performance/prime_sieve_performance_jm.cpp @@ -0,0 +1,199 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +template +void linear_sieve(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::detail::linear_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} + +template +void linear_sieve_jm_helper(Integer upper, std::vector& primes) +{ + jm::detail::simple_bitset masks((upper + 3) / 2); + jm::detail::linear_sieve_classical(masks, std::back_inserter(primes)); +} + +template +void linear_sieve_jm(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + linear_sieve_jm_helper(upper, primes); + } + state.SetComplexityN(state.range(0)); +} + +// Complete Implementations +template +void prime_sieve_seq(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::prime_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_par(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + boost::math::prime_sieve(std::execution::par, upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_seq_jm(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_seq_jm_par(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(std::execution::par, upper, primes); + } + state.SetComplexityN(state.range(0)); +} +template +void prime_sieve_seq_jm_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for(auto _ : state) + { + primes.clear(); + jm::prime_sieve(upper, std::back_inserter(primes)); + } + state.SetComplexityN(state.range(0)); +} + +template +void kimwalish_primes(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + for (auto _ : state) + { + primes.clear(); + primesieve::generate_primes(upper, &primes); + } + state.SetComplexityN(state.range(0)); +} + +template +inline Integer kimwalish_prime_factorizer_helper(Integer upper, I2 value) +{ + std::vector primes; + primesieve::generate_primes(upper, &primes); + for (unsigned i = 0; i < primes.size(); ++i) + while (value % primes[i] == 0) + value /= primes[i]; + return value; +} + +template +void kimwalish_prime_factorizer(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + for (auto _ : state) + { + benchmark::DoNotOptimize(kimwalish_prime_factorizer_helper(upper, std::numeric_limits::max())); + } + state.SetComplexityN(state.range(0)); +} + +template +void prime_sieve_seq_oi(benchmark::State& state) +{ + Integer upper = static_cast(state.range(0)); + std::vector primes; + boost::math::prime_reserve(upper, primes); + for (auto _ : state) + { + benchmark::DoNotOptimize(boost::math::detail::prime_sieve::linear_sieve(upper, std::back_inserter(primes))); + } + state.SetComplexityN(state.range(0)); +} + +constexpr uint64_t low_range = 4; +constexpr uint64_t high_range = uint64_t(1) << 32; + +// Invidiual Implementations +// Linear +//BENCHMARK_TEMPLATE(linear_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 16)->Complexity(benchmark::oN); + +//BENCHMARK_TEMPLATE(linear_sieve, uint32_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(linear_sieve_jm, uint32_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); + +// Segmented +//BENCHMARK_TEMPLATE(mask_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(mask_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(benchmark::oNLogN); +//BENCHMARK_TEMPLATE(interval_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 14, 2 << 26)->Complexity(); +//BENCHMARK_TEMPLATE(mask_sieve, uint32_t)->RangeMultiplier(2)->Range(1 << 2, 2 << 22)->Complexity(benchmark::oNLogN); + +// Complete Implemenations +//BENCHMARK_TEMPLATE(prime_sieve, int32_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, int64_t)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(kimwalish_primes, int64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_seq, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_par, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve_seq_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +BENCHMARK_TEMPLATE(prime_sieve_seq_jm_par, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve_seq_jm_oi, uint64_t)->RangeMultiplier(2)->Range(low_range, high_range)->Complexity(benchmark::oN); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::cpp_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); +//BENCHMARK_TEMPLATE(prime_sieve, boost::multiprecision::mpz_int)->RangeMultiplier(2)->Range(1 << 1, 1 << 30)->Complexity(benchmark::oN)->UseRealTime(); + +BENCHMARK_MAIN(); diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 60dc9c154d..c969824b04 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -971,6 +971,7 @@ test-suite misc : [ run compile_test/catmull_rom_concept_test.cpp compile_test_main : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ] [ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run test_prime_sieve.cpp ../../test/build//boost_unit_test_framework : : : -lgmp [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run empirical_cumulative_distribution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] diff --git a/test/test_prime_sieve.cpp b/test/test_prime_sieve.cpp new file mode 100644 index 0000000000..dedd23b100 --- /dev/null +++ b/test/test_prime_sieve.cpp @@ -0,0 +1,529 @@ +// Copyright 2020 Matt Borland +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +void test_prime_sieve() +{ + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha + + // Does the function work with a vector + boost::math::prime_sieve(std::execution::par, static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Tests for correctness + // 2 + primes.clear(); + boost::math::prime_sieve(std::execution::par, static_cast(2), &primes); + BOOST_TEST_EQ(primes.size(), 0); + + // 100 + primes.clear(); + boost::math::prime_sieve(std::execution::par, static_cast(100), &primes); + BOOST_TEST_EQ(primes.size(), 25); + + // 10'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, static_cast(10'000), &primes); + BOOST_TEST_EQ(primes.size(), 1229); + + // 100'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, static_cast(100'000), &primes); + BOOST_TEST_EQ(primes.size(), 9592); + + // 1'000'000 + primes.clear(); + boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), &primes); + BOOST_TEST_EQ(primes.size(), 78498); + + // Does the function work with a deque? + std::deque d_primes; + boost::math::prime_sieve(std::execution::par, static_cast(1'000), &d_primes); + BOOST_TEST_EQ(d_primes.size(), ref); +} + +template +void test_sequential_prime_sieve() +{ + std::vector primes; + + // 10'000 + primes.clear(); + boost::math::prime_sieve(static_cast(10'000), &primes); + BOOST_TEST_EQ(primes.size(), 1229); + + // 100'000 + primes.clear(); + boost::math::prime_sieve(static_cast(100'000), &primes); + BOOST_TEST_EQ(primes.size(), 9592); + + // 1'000'000 + primes.clear(); + boost::math::prime_sieve(static_cast(1'000'000), &primes); + BOOST_TEST_EQ(primes.size(), 78498); +} + +template +void test_sequential_prime_sieve_iter() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + boost::math::prime_sieve_iter(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + +template +void test_prime_sieve_iter() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve_iter(std::execution::par, static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + +template +void test_prime_sieve_wrapper() +{ + constexpr std::size_t array_size {100'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + // 1'000 + boost::math::prime_sieve(std::execution::par, static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + // 10'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve(std::execution::par, static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + // 100'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve(std::execution::par, static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + // 1'000'000 + std::fill(primes.begin(), primes.end(), 0); + boost::math::prime_sieve(std::execution::par, static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 78'498); +} + +template +void test_prime_range() +{ + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha + + // Does the upper and lower bound call work + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does parallel version work + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does it work with a deque? + std::deque d_primes; + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000), &d_primes); + BOOST_TEST_EQ(d_primes.size(), ref); + + // Does the lower bound change the results? + ref = 143; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(100), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Will it call the sieve for large input + ref = 78498; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(2), static_cast(1'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_prime_range_large() +{ + std::vector primes; + Integer ref; + + // Larger numbers + ref = 586'081; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(1'000'000), static_cast(10'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 5'096'876; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(10'000'000), static_cast(100'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 48'638'573; + primes.clear(); + boost::math::prime_range(std::execution::par, static_cast(100'000'000), static_cast(1'073'741'824), &primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_prime_range_seq() +{ + std::vector primes; + Integer ref {168}; // Calculated with wolfram-alpha + + // Does the upper and lower bound call work + boost::math::prime_range(static_cast(2), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does parallel version work + primes.clear(); + boost::math::prime_range(static_cast(2), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Does it work with a deque? + std::deque d_primes; + boost::math::prime_range(static_cast(2), static_cast(1'000), &d_primes); + BOOST_TEST_EQ(d_primes.size(), ref); + + // Does the lower bound change the results? + ref = 143; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(100), static_cast(1'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + // Will it call the sieve for large input + ref = 78498; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(2), static_cast(1'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_prime_range_seq_large() +{ + std::vector primes; + Integer ref; + + // Larger numbers + ref = 586'081; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(1'000'000), static_cast(10'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 5'096'876; // Calculated with wolfram-alpha + primes.clear(); + boost::math::prime_range(static_cast(10'000'000), static_cast(100'000'000), &primes); + BOOST_TEST_EQ(primes.size(), ref); + + ref = 48'638'573; + primes.clear(); + boost::math::prime_range(static_cast(100'000'000), static_cast(1'073'741'824), &primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_par_prime_sieve_large() +{ + std::vector primes; + Integer ref {54400028}; // Calculated with wolfram-alpha + + // Force the sieve into the multi-threading section and test reserve functionality + boost::math::prime_reserve(static_cast(1073741824), primes); + boost::math::prime_sieve(std::execution::par, static_cast(1073741824), &primes); + BOOST_TEST_EQ(primes.size(), ref); +} + +template +void test_interval_sieve() +{ + std::vector pre_sieved_primes {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71}; + std::vector primes; + + boost::math::detail::IntervalSieve sieve(static_cast(1'000), static_cast(10'000), pre_sieved_primes, primes); + BOOST_TEST_EQ(primes.size(), 1'061); + + primes.clear(); + sieve.NewRange(static_cast(10'000), static_cast(100'000), primes); + BOOST_TEST_EQ(primes.size(), 8'363); + + primes.clear(); + sieve.NewRange(static_cast(100'000), static_cast(1'000'000), primes); + BOOST_TEST_EQ(primes.size(), 68'906); +} + +template +void test_interval_sieve_iterator() +{ + const std::size_t array_size {70'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::IntervalSieve sieve(static_cast(1'000), static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'061); + + std::fill(primes.begin(), primes.end(), 0); + sieve.NewRange(static_cast(10'000), static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 8'363); + + std::fill(primes.begin(), primes.end(), 0); + sieve.NewRange(static_cast(100'000), static_cast(1'000'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 68'906); +} + +template +void test_linear_sieve() +{ + std::vector primes; + + boost::math::detail::linear_sieve(static_cast(1'000), primes); + BOOST_TEST_EQ(primes.size(), 168); + + primes.clear(); + boost::math::detail::linear_sieve(static_cast(10'000), primes); + BOOST_TEST_EQ(primes.size(), 1229); + + primes.clear(); + boost::math::detail::linear_sieve(static_cast(100'000), primes); + BOOST_TEST_EQ(primes.size(), 9592); +} + +template +void test_linear_sieve_iterator() +{ + constexpr std::size_t array_size {10'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::linear_sieve(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); + + std::vector primes_v; + //boost::math::prime_reserve(static_cast(100'000), primes_v); Prime reserve does not work with OI. Need to use resize. + primes_v.resize(10'000, 0); + boost::math::detail::prime_sieve::linear_sieve(static_cast(100'000), primes_v.begin()); + BOOST_TEST_EQ(array_size - std::count(primes_v.cbegin(), primes_v.cend(), 0), 9'592); + +} + +template +void test_stepanov_sieve() +{ + constexpr std::size_t array_size {500}; + std::array primes; + + boost::math::detail::prime_sieve::stepanov_sieve(static_cast(500), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 167); // Skips 2 +} + +template +void test_wheel_sieve_of_eratosthenes() +{ + constexpr std::size_t array_size {10'000}; + std::array primes; + std::fill(primes.begin(), primes.end(), 0); + + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(1'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 168); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(10'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 1'229); + + std::fill(primes.begin(), primes.end(), 0); + boost::math::detail::prime_sieve::wheel_sieve_of_eratosthenes(static_cast(100'000), primes.begin()); + BOOST_TEST_EQ(array_size - std::count(primes.cbegin(), primes.cend(), 0), 9'592); +} + +int main() +{ + // Test prime approximation for constexpr + static_assert(boost::math::prime_approximation(100) != 0, "log and/or floor is/are not constexpr"); + + // Test SFINAE + std::vector test; + auto test_ref = &test; + static_assert(boost::math::detail::is_container>::value == 1, "INOP"); + + // Individual Algorithms + test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); + test_linear_sieve(); + + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); + test_interval_sieve(); + + // Individual Algorithms with Iterators + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + test_linear_sieve_iterator(); + + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + test_interval_sieve_iterator(); + + test_stepanov_sieve(); + + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + test_wheel_sieve_of_eratosthenes(); + + // Composite + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + test_prime_sieve(); + + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + test_sequential_prime_sieve(); + + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + test_prime_range(); + + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + test_prime_range_seq(); + + // Composite Algorithms with Iterators + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + test_sequential_prime_sieve_iter(); + + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + test_prime_sieve_iter(); + + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + test_prime_sieve_wrapper(); + + // Large composite tests (Commented out for CI) + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + //test_par_prime_sieve_large(); + + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + //test_prime_range_large(); + + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + //test_prime_range_seq_large(); + + boost::math::set_l1d_size(100'000); + BOOST_ASSERT_MSG(boost::math::detail::prime_sieve::L1D_SIZE == 100'000, "L1 Size not set"); + + boost::report_errors(); +}