From 0360aa9c7533203416ae6f322be61676a205dfe7 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Sun, 13 Jun 2021 19:47:41 +0200 Subject: [PATCH 01/26] rand_distr: Add Zipf distribution --- rand_distr/src/lib.rs | 4 +- rand_distr/src/zipf.rs | 143 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 rand_distr/src/zipf.rs diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs index f44292724ab..4475c1012a9 100644 --- a/rand_distr/src/lib.rs +++ b/rand_distr/src/lib.rs @@ -75,6 +75,7 @@ //! - Misc. distributions //! - [`InverseGaussian`] distribution //! - [`NormalInverseGaussian`] distribution +//! - [`Zipf`] distribution #[cfg(feature = "alloc")] extern crate alloc; @@ -115,6 +116,7 @@ pub use self::unit_circle::UnitCircle; pub use self::unit_disc::UnitDisc; pub use self::unit_sphere::UnitSphere; pub use self::weibull::{Error as WeibullError, Weibull}; +pub use self::zipf::{Error as ZipfError, Zipf}; #[cfg(feature = "alloc")] #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] pub use rand::distributions::{WeightedError, WeightedIndex}; @@ -198,4 +200,4 @@ mod unit_sphere; mod utils; mod weibull; mod ziggurat_tables; - +mod zipf; diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs new file mode 100644 index 00000000000..4fd2270bc24 --- /dev/null +++ b/rand_distr/src/zipf.rs @@ -0,0 +1,143 @@ +// Copyright 2021 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! The Zipf distribution. + +use num_traits::Float; +use crate::{Distribution, Standard}; +use rand::Rng; +use core::fmt; + +/// Samples floating-point numbers according to the Zipf distribution. +/// +/// The Zipf distribution (also known as the zeta distribution) is a continuous +/// probability distribution that satisfies Zipf’s law: the frequency of an item +/// is inversely proportional to its rank in a frequency table. +/// +/// # Example +/// ``` +/// use rand::prelude::*; +/// use rand_distr::Zipf; +/// +/// let val: f64 = thread_rng().sample(Zipf::new(1.5).unwrap()); +/// println!("{}", val); +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct Zipf +where F: Float, Standard: Distribution +{ + a_minus_1: F, + b: F, +} + +/// Error type returned from `Zipf::new`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Error { + /// `a <= 1` or `nan`. + ATooSmall, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Error::ATooSmall => "a <= 0 or is NaN in Zipf distribution", + }) + } +} + +#[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] +impl std::error::Error for Error {} + +impl Zipf +where F: Float, Standard: Distribution +{ + /// Construct a new `Zipf` distribution with given `a` parameter. + pub fn new(a: F) -> Result, Error> { + if !(a > F::one()) { + return Err(Error::ATooSmall); + } + let a_minus_1 = a - F::one(); + let two = F::one() + F::one(); + Ok(Zipf { + a_minus_1, + b: two.powf(a_minus_1), + }) + } +} + +impl Distribution for Zipf +where F: Float, Standard: Distribution +{ + fn sample(&self, rng: &mut R) -> F { + // This is based on the numpy implementation. + loop { + let u = F::one() - rng.sample(Standard); + let v = rng.sample(Standard); + let x = u.powf(-F::one() / self.a_minus_1).floor(); + + if x < F::one() { + continue; + } + + let t = (F::one() + F::one() / x).powf(self.a_minus_1); + if v * x * (t - F::one()) / (self.b - F::one()) <= t / self.b { + return x; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic] + fn invalid() { + Zipf::new(1.).unwrap(); + } + + #[test] + #[should_panic] + fn nan() { + Zipf::new(core::f64::NAN).unwrap(); + } + + #[test] + fn sample() { + let a = 2.0; + let d = Zipf::new(a).unwrap(); + let mut rng = crate::test::rng(1); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + } + + #[test] + fn value_stability() { + fn test_samples>( + distr: D, zero: F, expected: &[F], + ) { + let mut rng = crate::test::rng(213); + let mut buf = [zero; 4]; + for x in &mut buf { + *x = rng.sample(&distr); + } + assert_eq!(buf, expected); + } + + test_samples(Zipf::new(1.5).unwrap(), 0f32, &[ + 605.0, 1.0, 8.0, 15.0, + ]); + test_samples(Zipf::new(2.0).unwrap(), 0f64, &[ + 1.0, 2.0, 4.0, 1.0, + ]); + } +} From 1e1e768f0e69cdeac448c9b0b287b3f12aae289b Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Mon, 14 Jun 2021 01:25:17 +0200 Subject: [PATCH 02/26] Update changelog --- rand_distr/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md index 27fc9a8ddd3..887852b0a91 100644 --- a/rand_distr/CHANGELOG.md +++ b/rand_distr/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +- New `Zipf` distribution (#1136) + ## [0.4.1] - 2021-06-15 - Empirically test PDF of normal distribution (#1121) - Correctly document `no_std` support (#1100) From a57247d9a2fe61067c6be44773a75772f26953ce Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 15 Jun 2021 12:06:02 +0200 Subject: [PATCH 03/26] Zipf: Use `OpenClosed01` --- rand_distr/src/zipf.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 4fd2270bc24..46d9027dad1 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -10,7 +10,7 @@ use num_traits::Float; use crate::{Distribution, Standard}; -use rand::Rng; +use rand::{Rng, distributions::OpenClosed01}; use core::fmt; /// Samples floating-point numbers according to the Zipf distribution. @@ -29,7 +29,7 @@ use core::fmt; /// ``` #[derive(Clone, Copy, Debug)] pub struct Zipf -where F: Float, Standard: Distribution +where F: Float, Standard: Distribution, OpenClosed01: Distribution { a_minus_1: F, b: F, @@ -55,7 +55,7 @@ impl fmt::Display for Error { impl std::error::Error for Error {} impl Zipf -where F: Float, Standard: Distribution +where F: Float, Standard: Distribution, OpenClosed01: Distribution { /// Construct a new `Zipf` distribution with given `a` parameter. pub fn new(a: F) -> Result, Error> { @@ -72,12 +72,12 @@ where F: Float, Standard: Distribution } impl Distribution for Zipf -where F: Float, Standard: Distribution +where F: Float, Standard: Distribution, OpenClosed01: Distribution { fn sample(&self, rng: &mut R) -> F { // This is based on the numpy implementation. loop { - let u = F::one() - rng.sample(Standard); + let u = rng.sample(OpenClosed01); let v = rng.sample(Standard); let x = u.powf(-F::one() / self.a_minus_1).floor(); From 718e71b826a8a4e8c568a4069d5c63cb1e8c33c7 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 15 Jun 2021 19:03:57 +0200 Subject: [PATCH 04/26] Zipf: Add benchmark --- rand_distr/benches/src/distributions.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rand_distr/benches/src/distributions.rs b/rand_distr/benches/src/distributions.rs index 621f351868c..2b3ff560954 100644 --- a/rand_distr/benches/src/distributions.rs +++ b/rand_distr/benches/src/distributions.rs @@ -195,6 +195,11 @@ fn bench(c: &mut Criterion) { distr_float!(g, "poisson", f64, Poisson::new(4.0).unwrap()); } + { + let mut g = c.benchmark_group("zipf"); + distr_float!(g, "zipf", f64, Zipf::new(1.5).unwrap()); + } + { let mut g = c.benchmark_group("bernoulli"); distr!(g, "bernoulli", bool, Bernoulli::new(0.18).unwrap()); From c2ecf1babb77d3cef5258d97e8a7664a74cf54de Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 15 Jun 2021 19:06:37 +0200 Subject: [PATCH 05/26] Fix value stability tests --- rand_distr/src/zipf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 46d9027dad1..2990c2b4bec 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -134,10 +134,10 @@ mod tests { } test_samples(Zipf::new(1.5).unwrap(), 0f32, &[ - 605.0, 1.0, 8.0, 15.0, + 1.0, 2.0, 1.0, 1.0, ]); test_samples(Zipf::new(2.0).unwrap(), 0f64, &[ - 1.0, 2.0, 4.0, 1.0, + 2.0, 1.0, 1.0, 1.0, ]); } } From 6c27184f22ae1bbde728f3865315078d04592955 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 00:13:45 +0200 Subject: [PATCH 06/26] Rename `Zipf` to `Zeta` This follows the naming convention on Wikipedia. --- rand_distr/CHANGELOG.md | 2 +- rand_distr/benches/src/distributions.rs | 2 +- rand_distr/src/lib.rs | 4 +-- rand_distr/src/zipf.rs | 40 ++++++++++++------------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md index 887852b0a91..b4b09f9404e 100644 --- a/rand_distr/CHANGELOG.md +++ b/rand_distr/CHANGELOG.md @@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased -- New `Zipf` distribution (#1136) +- New `Zeta` distribution (#1136) ## [0.4.1] - 2021-06-15 - Empirically test PDF of normal distribution (#1121) diff --git a/rand_distr/benches/src/distributions.rs b/rand_distr/benches/src/distributions.rs index 2b3ff560954..98a1fca0ac7 100644 --- a/rand_distr/benches/src/distributions.rs +++ b/rand_distr/benches/src/distributions.rs @@ -197,7 +197,7 @@ fn bench(c: &mut Criterion) { { let mut g = c.benchmark_group("zipf"); - distr_float!(g, "zipf", f64, Zipf::new(1.5).unwrap()); + distr_float!(g, "zipf", f64, Zeta::new(1.5).unwrap()); } { diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs index 4475c1012a9..750dffb0e3e 100644 --- a/rand_distr/src/lib.rs +++ b/rand_distr/src/lib.rs @@ -75,7 +75,7 @@ //! - Misc. distributions //! - [`InverseGaussian`] distribution //! - [`NormalInverseGaussian`] distribution -//! - [`Zipf`] distribution +//! - [`Zeta`] distribution #[cfg(feature = "alloc")] extern crate alloc; @@ -116,7 +116,7 @@ pub use self::unit_circle::UnitCircle; pub use self::unit_disc::UnitDisc; pub use self::unit_sphere::UnitSphere; pub use self::weibull::{Error as WeibullError, Weibull}; -pub use self::zipf::{Error as ZipfError, Zipf}; +pub use self::zipf::{Error as ZetaError, Zeta}; #[cfg(feature = "alloc")] #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] pub use rand::distributions::{WeightedError, WeightedIndex}; diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 2990c2b4bec..35f5132c7f8 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -6,36 +6,36 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! The Zipf distribution. +//! The Zeta and related distributions. use num_traits::Float; use crate::{Distribution, Standard}; use rand::{Rng, distributions::OpenClosed01}; use core::fmt; -/// Samples floating-point numbers according to the Zipf distribution. +/// Samples floating-point numbers according to the zeta distribution. /// -/// The Zipf distribution (also known as the zeta distribution) is a continuous -/// probability distribution that satisfies Zipf’s law: the frequency of an item -/// is inversely proportional to its rank in a frequency table. +/// The zeta distribution is a continuous probability distribution that +/// satisfies Zipf’s law: the frequency of an item is inversely proportional to +/// its rank in a frequency table. It is a limit of the Zipf distribution. /// /// # Example /// ``` /// use rand::prelude::*; -/// use rand_distr::Zipf; +/// use rand_distr::Zeta; /// -/// let val: f64 = thread_rng().sample(Zipf::new(1.5).unwrap()); +/// let val: f64 = thread_rng().sample(Zeta::new(1.5).unwrap()); /// println!("{}", val); /// ``` #[derive(Clone, Copy, Debug)] -pub struct Zipf +pub struct Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { a_minus_1: F, b: F, } -/// Error type returned from `Zipf::new`. +/// Error type returned from `Zeta::new`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Error { /// `a <= 1` or `nan`. @@ -45,7 +45,7 @@ pub enum Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(match self { - Error::ATooSmall => "a <= 0 or is NaN in Zipf distribution", + Error::ATooSmall => "a <= 0 or is NaN in Zeta distribution", }) } } @@ -54,24 +54,24 @@ impl fmt::Display for Error { #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl std::error::Error for Error {} -impl Zipf +impl Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { - /// Construct a new `Zipf` distribution with given `a` parameter. - pub fn new(a: F) -> Result, Error> { + /// Construct a new `Zeta` distribution with given `a` parameter. + pub fn new(a: F) -> Result, Error> { if !(a > F::one()) { return Err(Error::ATooSmall); } let a_minus_1 = a - F::one(); let two = F::one() + F::one(); - Ok(Zipf { + Ok(Zeta { a_minus_1, b: two.powf(a_minus_1), }) } } -impl Distribution for Zipf +impl Distribution for Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { fn sample(&self, rng: &mut R) -> F { @@ -100,19 +100,19 @@ mod tests { #[test] #[should_panic] fn invalid() { - Zipf::new(1.).unwrap(); + Zeta::new(1.).unwrap(); } #[test] #[should_panic] fn nan() { - Zipf::new(core::f64::NAN).unwrap(); + Zeta::new(core::f64::NAN).unwrap(); } #[test] fn sample() { let a = 2.0; - let d = Zipf::new(a).unwrap(); + let d = Zeta::new(a).unwrap(); let mut rng = crate::test::rng(1); for _ in 0..1000 { let r = d.sample(&mut rng); @@ -133,10 +133,10 @@ mod tests { assert_eq!(buf, expected); } - test_samples(Zipf::new(1.5).unwrap(), 0f32, &[ + test_samples(Zeta::new(1.5).unwrap(), 0f32, &[ 1.0, 2.0, 1.0, 1.0, ]); - test_samples(Zipf::new(2.0).unwrap(), 0f64, &[ + test_samples(Zeta::new(2.0).unwrap(), 0f64, &[ 2.0, 1.0, 1.0, 1.0, ]); } From b06c2f61af63a1ccffd56e3b9db5c195bcc58706 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 00:21:26 +0200 Subject: [PATCH 07/26] Don't claim `Zeta` follows Zipf's law It seems like this is not true [1], at least not with the same exponent. [1] https://en.wikipedia.org/wiki/Zeta_distribution --- rand_distr/src/zipf.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 35f5132c7f8..bdb8f9b1b55 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -15,9 +15,7 @@ use core::fmt; /// Samples floating-point numbers according to the zeta distribution. /// -/// The zeta distribution is a continuous probability distribution that -/// satisfies Zipf’s law: the frequency of an item is inversely proportional to -/// its rank in a frequency table. It is a limit of the Zipf distribution. +/// The zeta distribution is a limit of the Zipf distribution. /// /// # Example /// ``` From a07b3218faf42897b018ded19d48164562066bf9 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 01:52:16 +0200 Subject: [PATCH 08/26] rand_distr: Add Zipf (not zeta) distribution --- rand_distr/CHANGELOG.md | 2 +- rand_distr/src/lib.rs | 3 +- rand_distr/src/zipf.rs | 214 +++++++++++++++++++++++++++++++++++----- 3 files changed, 195 insertions(+), 24 deletions(-) diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md index b4b09f9404e..09ec411a5b8 100644 --- a/rand_distr/CHANGELOG.md +++ b/rand_distr/CHANGELOG.md @@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased -- New `Zeta` distribution (#1136) +- New `Zeta` and `Zipf` distributions (#1136) ## [0.4.1] - 2021-06-15 - Empirically test PDF of normal distribution (#1121) diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs index 750dffb0e3e..9d4496b4271 100644 --- a/rand_distr/src/lib.rs +++ b/rand_distr/src/lib.rs @@ -76,6 +76,7 @@ //! - [`InverseGaussian`] distribution //! - [`NormalInverseGaussian`] distribution //! - [`Zeta`] distribution +//! - [`Zipf`] distribution #[cfg(feature = "alloc")] extern crate alloc; @@ -116,7 +117,7 @@ pub use self::unit_circle::UnitCircle; pub use self::unit_disc::UnitDisc; pub use self::unit_sphere::UnitSphere; pub use self::weibull::{Error as WeibullError, Weibull}; -pub use self::zipf::{Error as ZetaError, Zeta}; +pub use self::zipf::{ZetaError, Zeta, ZipfError, Zipf}; #[cfg(feature = "alloc")] #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] pub use rand::distributions::{WeightedError, WeightedIndex}; diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index bdb8f9b1b55..2733bc6b35b 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -15,7 +15,7 @@ use core::fmt; /// Samples floating-point numbers according to the zeta distribution. /// -/// The zeta distribution is a limit of the Zipf distribution. +/// The zeta distribution is a limit of the [`Zipf`] distribution. /// /// # Example /// ``` @@ -35,30 +35,30 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution /// Error type returned from `Zeta::new`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Error { +pub enum ZetaError { /// `a <= 1` or `nan`. ATooSmall, } -impl fmt::Display for Error { +impl fmt::Display for ZetaError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(match self { - Error::ATooSmall => "a <= 0 or is NaN in Zeta distribution", + ZetaError::ATooSmall => "a <= 1 or is NaN in Zeta distribution", }) } } #[cfg(feature = "std")] #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] -impl std::error::Error for Error {} +impl std::error::Error for ZetaError {} impl Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { /// Construct a new `Zeta` distribution with given `a` parameter. - pub fn new(a: F) -> Result, Error> { + pub fn new(a: F) -> Result, ZetaError> { if !(a > F::one()) { - return Err(Error::ATooSmall); + return Err(ZetaError::ATooSmall); } let a_minus_1 = a - F::one(); let two = F::one() + F::one(); @@ -91,24 +91,135 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution } } +/// Samples floating-point numbers according to the Zipf distribution. +/// +/// The samples follow Zipf's law: The frequency of each sample is inversely +/// proportional to a power of its frequency rank. +/// +/// For large `n`, this converges to the [`Zeta`] distribution. +/// +/// For `s = 0`, this becomes a uniform distribution. +/// +/// # Example +/// ``` +/// use rand::prelude::*; +/// use rand_distr::Zipf; +/// +/// let val: f64 = thread_rng().sample(Zipf::new(10, 1.5).unwrap()); +/// println!("{}", val); +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct Zipf +where F: Float, Standard: Distribution, OpenClosed01: Distribution { + n: F, + s: F, + t: F, +} + +/// Error type returned from `Zipf::new`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ZipfError { + /// `s < 0` or `nan`. + STooSmall, + /// `n < 1`. + NTooSmall, +} + +impl fmt::Display for ZipfError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + ZipfError::STooSmall => "s < 0 or is NaN in Zipf distribution", + ZipfError::NTooSmall => "n < 1 in Zipf distribution", + }) + } +} + +#[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] +impl std::error::Error for ZipfError {} + +impl Zipf +where F: Float, Standard: Distribution, OpenClosed01: Distribution { + /// Construct a new `Zipf` distribution for a set with `n` elements and a + /// frequency rank exponent `s`. + pub fn new(n: u64, s: F) -> Result, ZipfError> { + if !(s >= F::zero()) { + return Err(ZipfError::STooSmall); + } + if n < 1 { + return Err(ZipfError::NTooSmall); + } + let n = F::from(n).unwrap(); + Ok(Zipf { + // FIXME: properly deal with `s == 1` + n, s, t: (n.powf(F::one() - s) - s) / (F::one() - s) + }) + } + + /// Inverse cumulative density function + fn inv_cdf(&self, p: F) -> F { + let one = F::one(); + let pt = p * self.t; + if pt <= one { + pt + } else { + (pt * (one - self.s) + self.s).powf(one / (one - self.s)) + } + } +} + +impl Distribution for Zipf +where F: Float, Standard: Distribution, OpenClosed01: Distribution +{ + fn sample(&self, rng: &mut R) -> F { + let one = F::one(); + loop { + let inv_b = self.inv_cdf(rng.sample(Standard)); + let x = (inv_b + one).floor(); + let numer = x.powf(-self.s); + let denom = if x <= one { + one / self.t + } else { + inv_b.powf(-self.s) / self.t + }; + + let y = rng.sample(Standard); + if y < numer / denom { + return x; + } + } + } +} + #[cfg(test)] mod tests { use super::*; + fn test_samples>( + distr: D, zero: F, expected: &[F], + ) { + let mut rng = crate::test::rng(213); + let mut buf = [zero; 4]; + for x in &mut buf { + *x = rng.sample(&distr); + } + assert_eq!(buf, expected); + } + #[test] #[should_panic] - fn invalid() { + fn zeta_invalid() { Zeta::new(1.).unwrap(); } #[test] #[should_panic] - fn nan() { + fn zeta_nan() { Zeta::new(core::f64::NAN).unwrap(); } #[test] - fn sample() { + fn zeta_sample() { let a = 2.0; let d = Zeta::new(a).unwrap(); let mut rng = crate::test::rng(1); @@ -119,18 +230,7 @@ mod tests { } #[test] - fn value_stability() { - fn test_samples>( - distr: D, zero: F, expected: &[F], - ) { - let mut rng = crate::test::rng(213); - let mut buf = [zero; 4]; - for x in &mut buf { - *x = rng.sample(&distr); - } - assert_eq!(buf, expected); - } - + fn zeta_value_stability() { test_samples(Zeta::new(1.5).unwrap(), 0f32, &[ 1.0, 2.0, 1.0, 1.0, ]); @@ -138,4 +238,74 @@ mod tests { 2.0, 1.0, 1.0, 1.0, ]); } + + #[test] + #[should_panic] + fn zipf_s_too_small() { + Zipf::new(10, -1.).unwrap(); + } + + #[test] + #[should_panic] + fn zipf_n_too_small() { + Zipf::new(0, 1.).unwrap(); + } + + #[test] + #[should_panic] + fn zipf_nan() { + Zipf::new(10, core::f64::NAN).unwrap(); + } + + #[test] + fn zipf_sample() { + let d = Zipf::new(10, 0.5).unwrap(); + let mut rng = crate::test::rng(2); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + } + + #[test] + fn zipf_sample_s_1() { + let d = Zipf::new(10, 1.).unwrap(); + let mut rng = crate::test::rng(2); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + } + + #[test] + fn zipf_sample_s_0() { + let d = Zipf::new(10, 0.).unwrap(); + let mut rng = crate::test::rng(2); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + // TODO: verify that this is a uniform distribution + } + + #[test] + fn zipf_sample_large_n() { + let d = Zipf::new(core::u64::MAX, 1.5).unwrap(); + let mut rng = crate::test::rng(2); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + // TODO: verify that this is a zeta distribution + } + + #[test] + fn zipf_value_stability() { + test_samples(Zipf::new(10, 0.5).unwrap(), 0f32, &[ + 10.0, 2.0, 6.0, 7.0 + ]); + test_samples(Zipf::new(10, 2.0).unwrap(), 0f64, &[ + 1.0, 2.0, 3.0, 2.0 + ]); + } } From 6270248f59ac2f43780febd161442af6d7ef710c Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:11:17 +0200 Subject: [PATCH 09/26] Zipf: Fix `s = 1` special case Also inline the distribution methods. --- rand_distr/src/zipf.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 2733bc6b35b..cfd07a62f9e 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -142,6 +142,7 @@ impl Zipf where F: Float, Standard: Distribution, OpenClosed01: Distribution { /// Construct a new `Zipf` distribution for a set with `n` elements and a /// frequency rank exponent `s`. + #[inline] pub fn new(n: u64, s: F) -> Result, ZipfError> { if !(s >= F::zero()) { return Err(ZipfError::STooSmall); @@ -149,21 +150,28 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { if n < 1 { return Err(ZipfError::NTooSmall); } - let n = F::from(n).unwrap(); + let n = F::from(n).unwrap(); // FIXME + let t = if s != F::one() { + (n.powf(F::one() - s) - s) / (F::one() - s) + } else { + F::one() + n.ln() + }; Ok(Zipf { - // FIXME: properly deal with `s == 1` - n, s, t: (n.powf(F::one() - s) - s) / (F::one() - s) + n, s, t }) } /// Inverse cumulative density function + #[inline] fn inv_cdf(&self, p: F) -> F { let one = F::one(); let pt = p * self.t; if pt <= one { pt - } else { + } else if self.s != F::one() { (pt * (one - self.s) + self.s).powf(one / (one - self.s)) + } else { + pt.exp() } } } @@ -171,6 +179,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { impl Distribution for Zipf where F: Float, Standard: Distribution, OpenClosed01: Distribution { + #[inline] fn sample(&self, rng: &mut R) -> F { let one = F::one(); loop { From 4d67af2604ce2305fc5ca793c9ea9dd1e31ab8e5 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:30:46 +0200 Subject: [PATCH 10/26] Zipf: Mention that rounding may occur --- rand_distr/src/zipf.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index cfd07a62f9e..a5e8711c84f 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -142,6 +142,8 @@ impl Zipf where F: Float, Standard: Distribution, OpenClosed01: Distribution { /// Construct a new `Zipf` distribution for a set with `n` elements and a /// frequency rank exponent `s`. + /// + /// For large `n`, rounding may occur to fit the number into the float type. #[inline] pub fn new(n: u64, s: F) -> Result, ZipfError> { if !(s >= F::zero()) { @@ -150,7 +152,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { if n < 1 { return Err(ZipfError::NTooSmall); } - let n = F::from(n).unwrap(); // FIXME + let n = F::from(n).unwrap(); // This does not fail. let t = if s != F::one() { (n.powf(F::one() - s) - s) / (F::one() - s) } else { From 139e898f0654fa17e3a4948ef1e82067369a87d4 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:32:00 +0200 Subject: [PATCH 11/26] Zipf: Simplify trait bounds --- rand_distr/src/zipf.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index a5e8711c84f..ffe5ac354de 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -110,7 +110,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution /// ``` #[derive(Clone, Copy, Debug)] pub struct Zipf -where F: Float, Standard: Distribution, OpenClosed01: Distribution { +where F: Float, Standard: Distribution { n: F, s: F, t: F, @@ -139,7 +139,7 @@ impl fmt::Display for ZipfError { impl std::error::Error for ZipfError {} impl Zipf -where F: Float, Standard: Distribution, OpenClosed01: Distribution { +where F: Float, Standard: Distribution { /// Construct a new `Zipf` distribution for a set with `n` elements and a /// frequency rank exponent `s`. /// @@ -179,7 +179,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { } impl Distribution for Zipf -where F: Float, Standard: Distribution, OpenClosed01: Distribution +where F: Float, Standard: Distribution { #[inline] fn sample(&self, rng: &mut R) -> F { From f514fd68badcdf72d9249e92157dd26b8e5bef39 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:36:29 +0200 Subject: [PATCH 12/26] Zipf: Simplify calculation of ratio This should improve performance slightly. --- rand_distr/src/zipf.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index ffe5ac354de..a0fbd51cff5 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -187,15 +187,13 @@ where F: Float, Standard: Distribution loop { let inv_b = self.inv_cdf(rng.sample(Standard)); let x = (inv_b + one).floor(); - let numer = x.powf(-self.s); - let denom = if x <= one { - one / self.t - } else { - inv_b.powf(-self.s) / self.t + let mut ratio = x.powf(-self.s) * self.t; + if x > one { + ratio = ratio * inv_b.powf(self.s) }; let y = rng.sample(Standard); - if y < numer / denom { + if y < ratio { return x; } } From ccaa4de99020d83f9693686cbacca4032a22f55b Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:49:18 +0200 Subject: [PATCH 13/26] Zipf: Update benchmarks --- rand_distr/benches/src/distributions.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rand_distr/benches/src/distributions.rs b/rand_distr/benches/src/distributions.rs index 98a1fca0ac7..61c2aa1883c 100644 --- a/rand_distr/benches/src/distributions.rs +++ b/rand_distr/benches/src/distributions.rs @@ -197,7 +197,8 @@ fn bench(c: &mut Criterion) { { let mut g = c.benchmark_group("zipf"); - distr_float!(g, "zipf", f64, Zeta::new(1.5).unwrap()); + distr_float!(g, "zipf", f64, Zipf::new(10, 1.5).unwrap()); + distr_float!(g, "zeta", f64, Zeta::new(1.5).unwrap()); } { From 3cccc64a464dadc8dc6819b508b813184684902c Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:50:58 +0200 Subject: [PATCH 14/26] Zeta: Inline distribution methods --- rand_distr/src/zipf.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index a0fbd51cff5..41d10f4ec0b 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -56,6 +56,7 @@ impl Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { /// Construct a new `Zeta` distribution with given `a` parameter. + #[inline] pub fn new(a: F) -> Result, ZetaError> { if !(a > F::one()) { return Err(ZetaError::ATooSmall); @@ -72,6 +73,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution impl Distribution for Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution { + #[inline] fn sample(&self, rng: &mut R) -> F { // This is based on the numpy implementation. loop { From 14d55f86642bb9e4fc306765bbcc18b570ccb45d Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 16 Jun 2021 02:53:10 +0200 Subject: [PATCH 15/26] Group `Zeta` and `Zipf` with rate-related distributions Arguably, the Zipf distribution is related to the Pareto distribution [1]. [1] https://en.wikipedia.org/wiki/Zipf's_law#Related_laws --- rand_distr/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs index 9d4496b4271..4b9e803a130 100644 --- a/rand_distr/src/lib.rs +++ b/rand_distr/src/lib.rs @@ -56,6 +56,8 @@ //! - [`Poisson`] distribution //! - [`Exp`]onential distribution, and [`Exp1`] as a primitive //! - [`Weibull`] distribution +//! - [`Zeta`] distribution +//! - [`Zipf`] distribution //! - Gamma and derived distributions: //! - [`Gamma`] distribution //! - [`ChiSquared`] distribution @@ -75,8 +77,6 @@ //! - Misc. distributions //! - [`InverseGaussian`] distribution //! - [`NormalInverseGaussian`] distribution -//! - [`Zeta`] distribution -//! - [`Zipf`] distribution #[cfg(feature = "alloc")] extern crate alloc; From 85f55b278afef3c181237e2460626224945df0b9 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 27 Jul 2021 21:33:07 +0200 Subject: [PATCH 16/26] Zeta and Zipf: Improve docs --- rand_distr/src/zipf.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 41d10f4ec0b..2ea2177c5a4 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -13,9 +13,14 @@ use crate::{Distribution, Standard}; use rand::{Rng, distributions::OpenClosed01}; use core::fmt; -/// Samples floating-point numbers according to the zeta distribution. +/// Samples integers according to the [zeta distribution]. /// -/// The zeta distribution is a limit of the [`Zipf`] distribution. +/// The zeta distribution is a limit of the [`Zipf`] distribution. Sometimes it +/// is called one of the following: discrete Pareto, Riemann-Zeta, Zipf, or +/// Zipf–Estoup distribution. +/// +/// It has the density function `f(k) = k^(-a) / C(a)` for `k >= 1`, where `a` +/// is the parameter and `C(a)` is the Riemann zeta function. /// /// # Example /// ``` @@ -25,6 +30,8 @@ use core::fmt; /// let val: f64 = thread_rng().sample(Zeta::new(1.5).unwrap()); /// println!("{}", val); /// ``` +/// +/// [zeta distribution]: https://en.wikipedia.org/wiki/Zeta_distribution #[derive(Clone, Copy, Debug)] pub struct Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution @@ -75,7 +82,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { #[inline] fn sample(&self, rng: &mut R) -> F { - // This is based on the numpy implementation. + // This is based on https://doi.org/10.1007/978-1-4613-8643-8. loop { let u = rng.sample(OpenClosed01); let v = rng.sample(Standard); @@ -93,10 +100,11 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution } } -/// Samples floating-point numbers according to the Zipf distribution. +/// Samples integers according to the Zipf distribution. /// -/// The samples follow Zipf's law: The frequency of each sample is inversely -/// proportional to a power of its frequency rank. +/// The samples follow Zipf's law: The frequency of each sample from a finite +/// set of size `n` is inversely proportional to a power of its frequency rank +/// (with exponent `s`). /// /// For large `n`, this converges to the [`Zeta`] distribution. /// From 2a33433247751fbb6803c879821745bea2175430 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 27 Jul 2021 21:34:52 +0200 Subject: [PATCH 17/26] Zeta: Replace likely impossible if with debug_assert --- rand_distr/src/zipf.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 2ea2177c5a4..7f20634c46a 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -85,14 +85,12 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution // This is based on https://doi.org/10.1007/978-1-4613-8643-8. loop { let u = rng.sample(OpenClosed01); - let v = rng.sample(Standard); let x = u.powf(-F::one() / self.a_minus_1).floor(); - - if x < F::one() { - continue; - } + debug_assert!(x >= F::one()); let t = (F::one() + F::one() / x).powf(self.a_minus_1); + + let v = rng.sample(Standard); if v * x * (t - F::one()) / (self.b - F::one()) <= t / self.b { return x; } From e19349c0cf3d585c8adbb8ef1fac8943d6242399 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 28 Jul 2021 17:39:22 +0200 Subject: [PATCH 18/26] Give credit for implementation details --- rand_distr/src/zipf.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 7f20634c46a..9d981fdc1e1 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -116,6 +116,13 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution /// let val: f64 = thread_rng().sample(Zipf::new(10, 1.5).unwrap()); /// println!("{}", val); /// ``` +/// +/// # Implementation details +/// +/// Implemented via [rejection sampling](https://en.wikipedia.org/wiki/Rejection_sampling), +/// due to Jason Crease[1]. +/// +/// [1]: https://jasoncrease.medium.com/rejection-sampling-the-zipf-distribution-6b359792cffa #[derive(Clone, Copy, Debug)] pub struct Zipf where F: Float, Standard: Distribution { From a746fd2369b1085593f0482ae0baf3ba7d185186 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 28 Jul 2021 18:23:03 +0200 Subject: [PATCH 19/26] Zipf: Fix `inv_cdf` for `s = 1` --- rand_distr/src/zipf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 9d981fdc1e1..dfc4e27c84a 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -188,7 +188,7 @@ where F: Float, Standard: Distribution { } else if self.s != F::one() { (pt * (one - self.s) + self.s).powf(one / (one - self.s)) } else { - pt.exp() + (pt - one).exp() } } } From b053683ee44595fc7850603c7144b56f609ff2bd Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 28 Jul 2021 18:26:45 +0200 Subject: [PATCH 20/26] Zipf: Correctly calculate rejection ratio --- rand_distr/src/zipf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index dfc4e27c84a..a3aeaea2f65 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -202,7 +202,7 @@ where F: Float, Standard: Distribution loop { let inv_b = self.inv_cdf(rng.sample(Standard)); let x = (inv_b + one).floor(); - let mut ratio = x.powf(-self.s) * self.t; + let mut ratio = x.powf(-self.s); if x > one { ratio = ratio * inv_b.powf(self.s) }; From 0f9243c4a642f43d4e9a66b687132f507ff08823 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 28 Jul 2021 19:12:58 +0200 Subject: [PATCH 21/26] Zipf: Add debug_assert for invariant --- rand_distr/src/zipf.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index a3aeaea2f65..14f146c165a 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -173,6 +173,7 @@ where F: Float, Standard: Distribution { } else { F::one() + n.ln() }; + debug_assert!(t > F::zero()); Ok(Zipf { n, s, t }) From e5aff9a10df6580b82a34219e8fb0f5ba78772f1 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Fri, 30 Jul 2021 13:15:58 +0200 Subject: [PATCH 22/26] Zipf: Avoid division inside loop --- rand_distr/src/zipf.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 14f146c165a..f6e71b53542 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -129,6 +129,7 @@ where F: Float, Standard: Distribution { n: F, s: F, t: F, + q: F, } /// Error type returned from `Zipf::new`. @@ -168,14 +169,21 @@ where F: Float, Standard: Distribution { return Err(ZipfError::NTooSmall); } let n = F::from(n).unwrap(); // This does not fail. + let q = if s != F::one() { + // Make sure to calculate the division only once. + F::one() / (F::one() - s) + } else { + // This value is never used. + F::zero() + }; let t = if s != F::one() { - (n.powf(F::one() - s) - s) / (F::one() - s) + (n.powf(F::one() - s) - s) * q } else { F::one() + n.ln() }; debug_assert!(t > F::zero()); Ok(Zipf { - n, s, t + n, s, t, q }) } @@ -186,8 +194,8 @@ where F: Float, Standard: Distribution { let pt = p * self.t; if pt <= one { pt - } else if self.s != F::one() { - (pt * (one - self.s) + self.s).powf(one / (one - self.s)) + } else if self.s != one { + (pt * (one - self.s) + self.s).powf(self.q) } else { (pt - one).exp() } From a32cd08eb4feea86e10f963ef0ae7ba2c248112c Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Fri, 30 Jul 2021 13:21:21 +0200 Subject: [PATCH 23/26] Zeta: Mention algorithm in doc comment --- rand_distr/src/zipf.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index f6e71b53542..5573c628426 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -31,7 +31,13 @@ use core::fmt; /// println!("{}", val); /// ``` /// +/// # Implementation details +/// +/// We are using the algorithm from [Non-Uniform Random Variate Generation], +/// Section 6.1, page 551. +/// /// [zeta distribution]: https://en.wikipedia.org/wiki/Zeta_distribution +/// [Non-Uniform Random Variate Generation]: https://doi.org/10.1007/978-1-4613-8643-8 #[derive(Clone, Copy, Debug)] pub struct Zeta where F: Float, Standard: Distribution, OpenClosed01: Distribution @@ -82,7 +88,6 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution { #[inline] fn sample(&self, rng: &mut R) -> F { - // This is based on https://doi.org/10.1007/978-1-4613-8643-8. loop { let u = rng.sample(OpenClosed01); let x = u.powf(-F::one() / self.a_minus_1).floor(); From 72a63339b78ab270d02a08a191121c6d8b423937 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Fri, 30 Jul 2021 14:29:04 +0200 Subject: [PATCH 24/26] Zeta: Avoid division in rejection criterion --- rand_distr/src/zipf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 5573c628426..84636b881a9 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -96,7 +96,7 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution let t = (F::one() + F::one() / x).powf(self.a_minus_1); let v = rng.sample(Standard); - if v * x * (t - F::one()) / (self.b - F::one()) <= t / self.b { + if v * x * (t - F::one()) * self.b <= t * (self.b - F::one()) { return x; } } From cf4b7e46713b834a61b3c279de09c5a9de697b7a Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Mon, 2 Aug 2021 23:34:14 +0200 Subject: [PATCH 25/26] Zeta: Fix infinite loop for small `a` --- rand_distr/src/zipf.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 84636b881a9..8b012eaa6e1 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -92,6 +92,12 @@ where F: Float, Standard: Distribution, OpenClosed01: Distribution let u = rng.sample(OpenClosed01); let x = u.powf(-F::one() / self.a_minus_1).floor(); debug_assert!(x >= F::one()); + if x.is_infinite() { + // For sufficiently small `a`, `x` will always be infinite, + // which is rejected, resulting in an infinite loop. We avoid + // this by always returning infinity instead. + return x; + } let t = (F::one() + F::one() / x).powf(self.a_minus_1); @@ -267,6 +273,17 @@ mod tests { } } + #[test] + fn zeta_small_a() { + let a = 1. + 1e-15; + let d = Zeta::new(a).unwrap(); + let mut rng = crate::test::rng(2); + for _ in 0..1000 { + let r = d.sample(&mut rng); + assert!(r >= 1.); + } + } + #[test] fn zeta_value_stability() { test_samples(Zeta::new(1.5).unwrap(), 0f32, &[ From fe5a6e10d473056ad1f07b5240db15fab1bcc977 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 3 Aug 2021 23:56:35 +0200 Subject: [PATCH 26/26] Zeta: Document cases where infinity is returned --- rand_distr/src/zipf.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs index 8b012eaa6e1..ad322c4e1b3 100644 --- a/rand_distr/src/zipf.rs +++ b/rand_distr/src/zipf.rs @@ -31,6 +31,14 @@ use core::fmt; /// println!("{}", val); /// ``` /// +/// # Remarks +/// +/// The zeta distribution has no upper limit. Sampled values may be infinite. +/// In particular, a value of infinity might be returned for the following +/// reasons: +/// 1. it is the best representation in the type `F` of the actual sample. +/// 2. to prevent infinite loops for very small `a`. +/// /// # Implementation details /// /// We are using the algorithm from [Non-Uniform Random Variate Generation],