Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use biased autocorrelation estimate by default (resolves #1785) #1877

Merged
merged 3 commits into from
Oct 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions numpyro/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,13 @@ def _fft_next_fast_len(target):
target += 1


def autocorrelation(x, axis=0):
def autocorrelation(x, axis=0, bias=True):
"""
Computes the autocorrelation of samples at dimension ``axis``.

:param numpy.ndarray x: the input array.
:param int axis: the dimension to calculate autocorrelation.
:param bias: whether to use a biased estimator.
:return: autocorrelation of ``x``.
:rtype: numpy.ndarray
"""
Expand All @@ -127,25 +128,32 @@ def autocorrelation(x, axis=0):

# truncate and normalize the result, then transpose back to original shape
autocorr = autocorr[..., :N]
autocorr = autocorr / np.arange(N, 0.0, -1)

# the unbiased estimator is known to have "wild" tails, due to few samples at longer lags.
# see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly
# unbiased when the mean is known, whereas we it estimate from samples here.
if not bias:
autocorr = autocorr / np.arange(N, 0.0, -1)

with np.errstate(invalid="ignore", divide="ignore"):
autocorr = autocorr / autocorr[..., :1]
return np.swapaxes(autocorr, axis, -1)


def autocovariance(x, axis=0):
def autocovariance(x, axis=0, bias=True):
"""
Computes the autocovariance of samples at dimension ``axis``.

:param numpy.ndarray x: the input array.
:param int axis: the dimension to calculate autocovariance.
:param bias: whether to use a biased estimator.
:return: autocovariance of ``x``.
:rtype: numpy.ndarray
"""
return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True)
return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True)


def effective_sample_size(x):
def effective_sample_size(x, bias=True):
"""
Computes effective sample size of input ``x``, where the first dimension of
``x`` is chain dimension and the second dimension of ``x`` is draw dimension.
Expand All @@ -158,6 +166,7 @@ def effective_sample_size(x):
Stan Development Team

:param numpy.ndarray x: the input array.
:param bias: whether to use a biased estimator of the autocovariance.
:return: effective sample size of ``x``.
:rtype: numpy.ndarray
"""
Expand All @@ -166,7 +175,7 @@ def effective_sample_size(x):
assert x.shape[1] >= 2

# find autocovariance for each chain at lag k
gamma_k_c = autocovariance(x, axis=1)
gamma_k_c = autocovariance(x, axis=1, bias=bias)

# find autocorrelation at lag k (from Stan reference)
var_within, var_estimator = _compute_chain_variance_stats(x)
Expand Down
20 changes: 16 additions & 4 deletions test/test_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

import numpy as np
from numpy.testing import assert_allclose
from numpy.testing import assert_, assert_allclose
import pytest
from scipy.fftpack import next_fast_len

Expand Down Expand Up @@ -60,14 +60,26 @@ def test_hpdi():

def test_autocorrelation():
x = np.arange(10.0)
actual = autocorrelation(x)
actual = autocorrelation(x, bias=False)
expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45])
assert_allclose(actual, expected, atol=0.01)

actual = autocorrelation(x, bias=True)
expected = expected * np.arange(len(x), 0.0, -1) / len(x)
assert_allclose(actual, expected, atol=0.01)

# the unbiased estimator has variance O(1) at large lags
x = np.random.normal(size=20000)
ac = autocorrelation(x, bias=False)
assert_(np.any(np.abs(ac[-100:]) > 0.1))

ac = autocorrelation(x, bias=True)
assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)


def test_autocovariance():
x = np.arange(10.0)
actual = autocovariance(x)
actual = autocovariance(x, bias=False)
expected = np.array(
[8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25]
)
Expand All @@ -93,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin():

def test_effective_sample_size():
x = np.arange(1000.0).reshape(100, 10)
assert_allclose(effective_sample_size(x), 52.64, atol=0.01)
assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)
Loading