Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poisson: Fix undefined behavior and support f64 output #795

Merged
merged 9 commits into from
May 16, 2019
64 changes: 33 additions & 31 deletions rand_distr/src/poisson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
//! The Poisson distribution.

use rand::Rng;
use crate::{Distribution, Cauchy};
use crate::utils::log_gamma;
use crate::{Distribution, Cauchy, Standard};
use crate::utils::{log_gamma, Float};

/// The Poisson distribution `Poisson(lambda)`.
///
Expand All @@ -28,13 +28,13 @@ use crate::utils::log_gamma;
/// println!("{} is from a Poisson(2) distribution", v);
/// ```
#[derive(Clone, Copy, Debug)]
pub struct Poisson {
lambda: f64,
pub struct Poisson<N> {
lambda: N,
// precalculated values
exp_lambda: f64,
log_lambda: f64,
sqrt_2lambda: f64,
magic_val: f64,
exp_lambda: N,
log_lambda: N,
sqrt_2lambda: N,
magic_val: N,
}

/// Error type returned from `Poisson::new`.
Expand All @@ -44,43 +44,45 @@ pub enum Error {
ShapeTooSmall,
}

impl Poisson {
impl<N: Float> Poisson<N> {
vks marked this conversation as resolved.
Show resolved Hide resolved
/// Construct a new `Poisson` with the given shape parameter
/// `lambda`.
pub fn new(lambda: f64) -> Result<Poisson, Error> {
if !(lambda > 0.0) {
pub fn new(lambda: N) -> Result<Poisson<N>, Error> {
if !(lambda > N::from(0.0)) {
return Err(Error::ShapeTooSmall);
}
let log_lambda = lambda.ln();
Ok(Poisson {
lambda,
exp_lambda: (-lambda).exp(),
log_lambda,
sqrt_2lambda: (2.0 * lambda).sqrt(),
magic_val: lambda * log_lambda - log_gamma(1.0 + lambda),
sqrt_2lambda: (N::from(2.0) * lambda).sqrt(),
magic_val: lambda * log_lambda - log_gamma(N::from(1.0) + lambda),
})
}
}

impl Distribution<f64> for Poisson {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
impl<N: Float> Distribution<N> for Poisson<N>
where Standard: Distribution<N>
{
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> N {
// using the algorithm from Numerical Recipes in C

// for low expected values use the Knuth method
if self.lambda < 12.0 {
let mut result = 0.;
let mut p = 1.0;
if self.lambda < N::from(12.0) {
let mut result = N::from(0.);
let mut p = N::from(1.0);
while p > self.exp_lambda {
p *= rng.gen::<f64>();
result += 1.;
p *= rng.gen::<N>();
result += N::from(1.);
}
result - 1.
result - N::from(1.)
}
// high expected values - rejection method
else {
// we use the Cauchy distribution as the comparison distribution
// f(x) ~ 1/(1+x^2)
let cauchy = Cauchy::new(0.0, 1.0).unwrap();
let cauchy = Cauchy::new(N::from(0.0), N::from(1.0)).unwrap();
let mut result;

loop {
Expand All @@ -92,7 +94,7 @@ impl Distribution<f64> for Poisson {
// shift the peak of the comparison ditribution
result = self.sqrt_2lambda * comp_dev + self.lambda;
// repeat the drawing until we are in the range of possible values
if result >= 0.0 {
if result >= N::from(0.0) {
break;
}
}
Expand All @@ -104,11 +106,11 @@ impl Distribution<f64> for Poisson {
// the magic value scales the distribution function to a range of approximately 0-1
// since it is not exact, we multiply the ratio by 0.9 to avoid ratios greater than 1
// this doesn't change the resulting distribution, only increases the rate of failed drawings
let check = 0.9 * (1.0 + comp_dev * comp_dev)
* (result * self.log_lambda - log_gamma(1.0 + result) - self.magic_val).exp();
let check = N::from(0.9) * (N::from(1.0) + comp_dev * comp_dev)
* (result * self.log_lambda - log_gamma(N::from(1.0) + result) - self.magic_val).exp();

// check with uniform random value - if below the threshold, we are within the target distribution
if rng.gen::<f64>() <= check {
if rng.gen::<N>() <= check {
break;
}
}
Expand All @@ -117,12 +119,12 @@ impl Distribution<f64> for Poisson {
}
}

impl Distribution<u64> for Poisson {
impl<N: Float> Distribution<u64> for Poisson<N>
where Standard: Distribution<N>
{
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
vks marked this conversation as resolved.
Show resolved Hide resolved
let result: f64 = self.sample(rng);
assert!(result >= 0.);
assert!(result <= ::core::u64::MAX as f64);
result as u64
let result: N = self.sample(rng);
result.into_ui().unwrap()
}
}

Expand Down
48 changes: 34 additions & 14 deletions rand_distr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ pub trait Float: Copy + Sized + cmp::PartialOrd
fn pi() -> Self;
/// Support approximate representation of a f64 value
fn from(x: f64) -> Self;
/// Support converting to an unsigned integer.
fn into_ui(self) -> Option<u64>;
vks marked this conversation as resolved.
Show resolved Hide resolved

/// Take the absolute value of self
fn abs(self) -> Self;
/// Take the largest integer less than or equal to self
fn floor(self) -> Self;

/// Take the exponential of self
fn exp(self) -> Self;
Expand All @@ -53,8 +57,16 @@ pub trait Float: Copy + Sized + cmp::PartialOrd
impl Float for f32 {
fn pi() -> Self { core::f32::consts::PI }
fn from(x: f64) -> Self { x as f32 }
fn into_ui(self) -> Option<u64> {
if self >= 0. && self <= ::core::u64::MAX as f32 {
Some(self as u64)
} else {
None
}
}

fn abs(self) -> Self { self.abs() }
fn floor(self) -> Self { self.floor() }

fn exp(self) -> Self { self.exp() }
fn ln(self) -> Self { self.ln() }
Expand All @@ -67,8 +79,16 @@ impl Float for f32 {
impl Float for f64 {
fn pi() -> Self { core::f64::consts::PI }
fn from(x: f64) -> Self { x }
fn into_ui(self) -> Option<u64> {
if self >= 0. && self <= ::core::u64::MAX as f64 {
Some(self as u64)
} else {
None
}
}

fn abs(self) -> Self { self.abs() }
fn floor(self) -> Self { self.floor() }

fn exp(self) -> Self { self.exp() }
fn ln(self) -> Self { self.ln() }
Expand All @@ -91,33 +111,33 @@ impl Float for f64 {
/// `Ag(z)` is an infinite series with coefficients that can be calculated
/// ahead of time - we use just the first 6 terms, which is good enough
/// for most purposes.
pub(crate) fn log_gamma(x: f64) -> f64 {
pub(crate) fn log_gamma<N: Float>(x: N) -> N {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not an optimal f32 implementation and I'm not sure it's even an acceptable one (e.g. the a parameter below rounds to 1). I would suggest just using f64 arithmetic and converting at function start/end, though there's very little point to supporting Poisson<f32> in this case (only really for the type deduction).

Additionally, this function prototype is okay for internal use but not if we wished to expose log_gamma since we could not add correct implementations for other float types without specialization. A trait would be better, though clunky since we only have one method.

Statrs's Gamma function implementations are likely better.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It essentially evaluates a polynomial, so I think it should be fine, even if the coefficients have less precision. I thinks it is more likely that too many coefficients are used for the target precision.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

though there's very little point to supporting Poisson in this case (only really for the type deduction).

It might help for sampling from the uniform distribution.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this help with the uniform distribution?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are doing some rejection sampling for the Poisson distribution, sampling f32 instead of f64 in the loop might help.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you mean speed up usage of uniform, not help implement uniform. Yes, and when we get a specialisation of log_gamma it may speed things up for any(?) situation where f32 is faster than f64.

// precalculated 6 coefficients for the first 6 terms of the series
let coefficients: [f64; 6] = [
76.18009172947146,
-86.50532032941677,
24.01409824083091,
-1.231739572450155,
0.1208650973866179e-2,
-0.5395239384953e-5,
let coefficients: [N; 6] = [
N::from(76.18009172947146),
N::from(-86.50532032941677),
N::from(24.01409824083091),
N::from(-1.231739572450155),
N::from(0.1208650973866179e-2),
N::from(-0.5395239384953e-5),
];

// (x+0.5)*ln(x+g+0.5)-(x+g+0.5)
let tmp = x + 5.5;
let log = (x + 0.5) * tmp.ln() - tmp;
let tmp = x + N::from(5.5);
let log = (x + N::from(0.5)) * tmp.ln() - tmp;

// the first few terms of the series for Ag(x)
let mut a = 1.000000000190015;
let mut a = N::from(1.000000000190015);
let mut denom = x;
for coeff in &coefficients {
denom += 1.0;
for &coeff in &coefficients {
denom += N::from(1.0);
a += coeff / denom;
}

// get everything together
// a is Ag(x)
// 2.5066... is sqrt(2pi)
log + (2.5066282746310005 * a / x).ln()
log + (N::from(2.5066282746310005) * a / x).ln()
}

/// Sample a random number using the Ziggurat method (specifically the
Expand Down