Skip to content

Commit

Permalink
Covariance-related functions for general AbstractArray
Browse files Browse the repository at this point in the history
  • Loading branch information
yha committed Oct 15, 2020
1 parent c5328b1 commit be9a11b
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 51 deletions.
59 changes: 38 additions & 21 deletions src/cov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# auxiliary functions

function _symmetrize!(a::DenseMatrix)
function _symmetrize!(a::AbstractMatrix)
m, n = size(a)
m == n || error("a must be a square matrix.")
for j = 1:n
Expand All @@ -15,20 +15,31 @@ function _symmetrize!(a::DenseMatrix)
return a
end

function _scalevars(x::DenseMatrix, s::AbstractWeights, dims::Int)
function _symmetrize!(a::AbstractSparseMatrix)
m, n = size(a)
m == n || error("a must be a square matrix.")
for (i,j,vl) in zip(findnz(a)...)
i > j || continue
vr = a[j,i]
a[i,j] = a[j,i] = middle(vl, vr)
end
return a
end

function _scalevars(x::AbstractMatrix, s::AbstractWeights, dims::Int)
dims == 1 ? Diagonal(s) * x :
dims == 2 ? x * Diagonal(s) :
error("dims should be either 1 or 2.")
end

## scatter matrix

_unscaled_covzm(x::DenseMatrix, dims::Colon) = unscaled_covzm(x)
_unscaled_covzm(x::DenseMatrix, dims::Integer) = unscaled_covzm(x, dims)
_unscaled_covzm(x::AbstractMatrix, dims::Colon) = unscaled_covzm(x)
_unscaled_covzm(x::AbstractMatrix, dims::Integer) = unscaled_covzm(x, dims)

_unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Colon) =
_unscaled_covzm(x::AbstractMatrix, wv::AbstractWeights, dims::Colon) =
_symmetrize!(unscaled_covzm(x, _scalevars(x, wv)))
_unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Integer) =
_unscaled_covzm(x::AbstractMatrix, wv::AbstractWeights, dims::Integer) =
_symmetrize!(unscaled_covzm(x, _scalevars(x, wv, dims), dims))

"""
Expand Down Expand Up @@ -75,30 +86,29 @@ Finally, bias correction is applied to the covariance calculation if
"""
function mean_and_cov end

scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) =
scattermat(x::AbstractMatrix; mean=nothing, dims::Int=1) =
_scattermatm(x, mean, dims)
_scattermatm(x::DenseMatrix, ::Nothing, dims::Int) =
_scattermatm(x::AbstractMatrix, ::Nothing, dims::Int) =
_unscaled_covzm(x .- mean(x, dims=dims), dims)
_scattermatm(x::DenseMatrix, mean, dims::Int=1) =
_scattermatm(x::AbstractMatrix, mean, dims::Int=1) =
_unscaled_covzm(x .- mean, dims)

scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) =
scattermat(x::AbstractMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) =
_scattermatm(x, wv, mean, dims)
_scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) =
_scattermatm(x::AbstractMatrix, wv::AbstractWeights, ::Nothing, dims::Int) =
_unscaled_covzm(x .- mean(x, wv, dims=dims), wv, dims)
_scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) =
_scattermatm(x::AbstractMatrix, wv::AbstractWeights, mean, dims::Int) =
_unscaled_covzm(x .- mean, wv, dims)

## weighted cov
covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1;
covm(x::AbstractMatrix, mean, w::AbstractWeights, dims::Int=1;
corrected::DepBool=nothing) =
rmul!(scattermat(x, w, mean=mean, dims=dims), varcorrection(w, depcheck(:covm, corrected)))


cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::DepBool=nothing) =
cov(x::AbstractMatrix, w::AbstractWeights, dims::Int=1; corrected::DepBool=nothing) =
covm(x, mean(x, w, dims=dims), w, dims; corrected=depcheck(:cov, corrected))

function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1)
function corm(x::AbstractMatrix, mean, w::AbstractWeights, vardim::Int=1)
c = covm(x, mean, w, vardim; corrected=false)
s = stdm(x, w, mean, vardim; corrected=false)
cov2cor!(c, s)
Expand All @@ -110,19 +120,24 @@ end
Compute the Pearson correlation matrix of `X` along the dimension
`dims` with a weighting `w` .
"""
cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) =
cor(x::AbstractMatrix, w::AbstractWeights, dims::Int=1) =
corm(x, mean(x, w, dims=dims), w, dims)

function mean_and_cov(x::DenseMatrix, dims::Int=1; corrected::Bool=true)
function mean_and_cov(x::AbstractMatrix, dims::Int=1; corrected::Bool=true)
m = mean(x, dims=dims)
return m, covm(x, m, dims, corrected=corrected)
end
function mean_and_cov(x::DenseMatrix, wv::AbstractWeights, dims::Int=1;
function mean_and_cov(x::AbstractMatrix, wv::AbstractWeights, dims::Int=1;
corrected::DepBool=nothing)
m = mean(x, wv, dims=dims)
return m, cov(x, wv, dims; corrected=depcheck(:mean_and_cov, corrected))
end

function mean_and_cov(x::AbstractVector; corrected::Bool=true)
m = mean(x)
return m, covm(x, m, corrected=corrected)
end

"""
cov2cor(C, s)
Expand All @@ -148,8 +163,10 @@ standard deviations `s`.
function cor2cov!(C::AbstractMatrix, s::AbstractArray)
n = length(s)
size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions"))
for i in CartesianIndices(size(C))
@inbounds C[i] *= s[i[1]] * s[i[2]]
@inbounds for i in CartesianIndices(size(C))
si = s[i[1]] * s[i[2]]
# the covariance is 0 when si==0, although C[i] is NaN in this case
C[i] = iszero(si) ? zero(eltype(C)) : C[i] * si
end
return C
end
Expand Down
80 changes: 50 additions & 30 deletions test/cov.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
using StatsBase
using LinearAlgebra, Random, Test
using SparseArrays

struct EmptyCovarianceEstimator <: CovarianceEstimator end

struct WrappedArray{T,N,A} <: AbstractArray{T,N}
a::A
WrappedArray(a::AbstractArray{T,N}) where {T,N} = new{T,N,typeof(a)}(a)
end
Base.size(w::WrappedArray) = size(w.a)
Base.getindex(w::WrappedArray{T,N}, I::Vararg{Int, N}) where {T,N} = getindex(w.a, I...)
Base.setindex!(w::WrappedArray{T,N}, v, I::Vararg{Int, N}) where {T,N} = setindex!(w.a, v, I...)

(x,y) = isapprox(x, y; nans=true)

@testset "StatsBase.Covariance" begin
weight_funcs = (weights, aweights, fweights, pweights)
array = randn(3, 8)
wrapped_array = WrappedArray(array)
sparse_array = sprandn(3, 8, 0.2)

@testset "$f" for f in weight_funcs
X = randn(3, 8)
@testset "$f,$(typeof(X))" for f in weight_funcs, X in (array, wrapped_array, sparse_array)

Z1 = X .- mean(X, dims = 1)
Z2 = X .- mean(X, dims = 2)
Expand Down Expand Up @@ -87,27 +100,32 @@ weight_funcs = (weights, aweights, fweights, pweights)
@testset "Mean and covariance" begin
(m, C) = mean_and_cov(X; corrected=false)
@test m == mean(X, dims=1)
@test C == cov(X, dims=1, corrected=false)
@test C cov(X, dims=1, corrected=false)

(m, C) = mean_and_cov(X, 1; corrected=false)
@test m == mean(X, dims=1)
@test C == cov(X, dims=1, corrected = false)
@test C cov(X, dims=1, corrected = false)

(m, C) = mean_and_cov(X, 2; corrected=false)
@test m == mean(X, dims=2)
@test C == cov(X, dims=2, corrected = false)
@test C cov(X, dims=2, corrected = false)

(m, C) = mean_and_cov(X, wv1; corrected=false)
@test m == mean(X, wv1, dims=1)
@test C == cov(X, wv1, 1, corrected=false)
@test C cov(X, wv1, 1, corrected=false)

(m, C) = mean_and_cov(X, wv1, 1; corrected=false)
@test m == mean(X, wv1, dims=1)
@test C == cov(X, wv1, 1, corrected=false)
@test C cov(X, wv1, 1, corrected=false)

(m, C) = mean_and_cov(X, wv2, 2; corrected=false)
@test m == mean(X, wv2, dims=2)
@test C == cov(X, wv2, 2, corrected=false)
@test C cov(X, wv2, 2, corrected=false)

v = collect(eachcol(X))
(m, C) = mean_and_cov(v; corrected=false)
@test m == mean(v)
@test C cov(v, corrected=false)
end
@testset "Conversions" begin
std1 = std(X, wv1, 1; corrected=false)
Expand All @@ -119,11 +137,12 @@ weight_funcs = (weights, aweights, fweights, pweights)
cor1 = cor(X, wv1, 1)
cor2 = cor(X, wv2, 2)


@testset "cov2cor" begin
@test cov2cor(cov(X, dims = 1), std(X, dims = 1)) cor(X, dims = 1)
@test cov2cor(cov(X, dims = 2), std(X, dims = 2)) cor(X, dims = 2)
@test cov2cor(cov1, std1) cor1
@test cov2cor(cov2, std2) cor2
@test cov2cor(cov(X, dims = 1), std(X, dims = 1)) cor(X, dims = 1)
@test cov2cor(cov(X, dims = 2), std(X, dims = 2)) cor(X, dims = 2)
@test cov2cor(cov1, std1) cor1
@test cov2cor(cov2, std2) cor2
end
@testset "cor2cov" begin
@test cor2cov(cor(X, dims = 1), std(X, dims = 1)) cov(X, dims = 1)
Expand Down Expand Up @@ -158,30 +177,30 @@ weight_funcs = (weights, aweights, fweights, pweights)
@testset "Mean and covariance" begin
(m, C) = mean_and_cov(X; corrected=true)
@test m == mean(X, dims=1)
@test C == cov(X, dims=1, corrected = true)
@test C cov(X, dims=1, corrected = true)

(m, C) = mean_and_cov(X, 1; corrected=true)
@test m == mean(X, dims=1)
@test C == cov(X, dims=1, corrected = true)
@test C cov(X, dims=1, corrected = true)

(m, C) = mean_and_cov(X, 2; corrected=true)
@test m == mean(X, dims=2)
@test C == cov(X, dims=2, corrected = true)
@test C cov(X, dims=2, corrected = true)

if isa(wv1, Weights)
@test_throws ArgumentError mean_and_cov(X, wv1; corrected=true)
else
(m, C) = mean_and_cov(X, wv1; corrected=true)
@test m == mean(X, wv1, dims=1)
@test C == cov(X, wv1, 1; corrected=true)
@test C cov(X, wv1, 1; corrected=true)

(m, C) = mean_and_cov(X, wv1, 1; corrected=true)
@test m == mean(X, wv1, dims=1)
@test C == cov(X, wv1, 1; corrected=true)
@test C cov(X, wv1, 1; corrected=true)

(m, C) = mean_and_cov(X, wv2, 2; corrected=true)
@test m == mean(X, wv2, dims=2)
@test C == cov(X, wv2, 2; corrected=true)
@test C cov(X, wv2, 2; corrected=true)
end
end
@testset "Conversions" begin
Expand All @@ -196,25 +215,26 @@ weight_funcs = (weights, aweights, fweights, pweights)
cor2 = cor(X, wv2, 2)

@testset "cov2cor" begin
@test cov2cor(cov(X, dims = 1), std(X, dims = 1)) cor(X, dims = 1)
@test cov2cor(cov(X, dims = 2), std(X, dims = 2)) cor(X, dims = 2)
@test cov2cor(cov1, std1) cor1
@test cov2cor(cov2, std2) cor2
@test cov2cor(cov(X, dims = 1), std(X, dims = 1)) cor(X, dims = 1)
@test cov2cor(cov(X, dims = 2), std(X, dims = 2)) cor(X, dims = 2)
@test cov2cor(cov1, std1) cor1
@test cov2cor(cov2, std2) cor2
end

@testset "cov2cor!" begin
tmp_cov1 = copy(cov1)
@test !(tmp_cov1 cor1)
@test !(tmp_cov1 cor1)
StatsBase.cov2cor!(tmp_cov1, std1)
@test tmp_cov1 cor1
@test tmp_cov1 cor1

tmp_cov2 = copy(cov2)
@test !(tmp_cov2 cor2)
@test !(tmp_cov2 cor2)
StatsBase.cov2cor!(tmp_cov2, std2)
@test tmp_cov2 cor2
@test tmp_cov2 cor2
end

@testset "cor2cov" begin

@test cor2cov(cor(X, dims = 1), std(X, dims = 1)) cov(X, dims = 1)
@test cor2cov(cor(X, dims = 2), std(X, dims = 2)) cov(X, dims = 2)
@test cor2cov(cor1, std1) cov1
Expand All @@ -237,8 +257,8 @@ weight_funcs = (weights, aweights, fweights, pweights)
end

@testset "Correlation" begin
@test cor(X, f(ones(3)), 1) cor(X, dims = 1)
@test cor(X, f(ones(8)), 2) cor(X, dims = 2)
@test cor(X, f(ones(3)), 1) cor(X, dims = 1)
@test cor(X, f(ones(8)), 2) cor(X, dims = 2)

cov1 = cov(X, wv1, 1; corrected=false)
std1 = std(X, wv1, 1; corrected=false)
Expand All @@ -247,8 +267,8 @@ weight_funcs = (weights, aweights, fweights, pweights)
expected_cor1 = StatsBase.cov2cor!(cov1, std1)
expected_cor2 = StatsBase.cov2cor!(cov2, std2)

@test cor(X, wv1, 1) expected_cor1
@test cor(X, wv2, 2) expected_cor2
@test cor(X, wv1, 1) expected_cor1
@test cor(X, wv2, 2) expected_cor2
end

@testset "Abstract covariance estimation" begin
Expand Down

0 comments on commit be9a11b

Please sign in to comment.