diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 0592530b8..714548d5d 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -17,14 +17,103 @@ Compute Spearman's rank correlation coefficient. If `x` and `y` are vectors, the output is a float, otherwise it's a matrix corresponding to the pairwise correlations of the columns of `x` and `y`. """ -corspearman(x::RealVector, y::RealVector) = cor(tiedrank(x), tiedrank(y)) +function corspearman(x::RealVector, y::RealVector) + n = length(x) + n == length(y) || throw(DimensionMismatch("vectors must have same length")) + (any(isnan, x) || any(isnan, y)) && return NaN + return cor(tiedrank(x), tiedrank(y)) +end + +function corspearman(X::RealMatrix, y::RealVector) + size(X, 1) == length(y) || + throw(DimensionMismatch("X and y have inconsistent dimensions")) + n = size(X, 2) + C = Matrix{Float64}(I, n, 1) + any(isnan, y) && return fill!(C, NaN) + yrank = tiedrank(y) + for j = 1:n + Xj = view(X, :, j) + if any(isnan, Xj) + C[j,1] = NaN + else + Xjrank = tiedrank(Xj) + C[j,1] = cor(Xjrank, yrank) + end + end + return C +end + +function corspearman(x::RealVector, Y::RealMatrix) + size(Y, 1) == length(x) || + throw(DimensionMismatch("x and Y have inconsistent dimensions")) + n = size(Y, 2) + C = Matrix{Float64}(I, 1, n) + any(isnan, x) && return fill!(C, NaN) + xrank = tiedrank(x) + for j = 1:n + Yj = view(Y, :, j) + if any(isnan, Yj) + C[1,j] = NaN + else + Yjrank = tiedrank(Yj) + C[1,j] = cor(xrank, Yjrank) + end + end + return C +end -corspearman(X::RealMatrix, Y::RealMatrix) = - cor(mapslices(tiedrank, X, dims=1), mapslices(tiedrank, Y, dims=1)) -corspearman(X::RealMatrix, y::RealVector) = cor(mapslices(tiedrank, X, dims=1), tiedrank(y)) -corspearman(x::RealVector, Y::RealMatrix) = cor(tiedrank(x), mapslices(tiedrank, Y, dims=1)) +function corspearman(X::RealMatrix) + n = size(X, 2) + C = Matrix{Float64}(I, n, n) + anynan = Vector{Bool}(undef, n) + for j = 1:n + Xj = view(X, :, j) + anynan[j] = any(isnan, Xj) + if anynan[j] + C[:,j] .= NaN + C[j,:] .= NaN + C[j,j] = 1 + continue + end + Xjrank = tiedrank(Xj) + for i = 1:(j-1) + Xi = view(X, :, i) + if anynan[i] + C[i,j] = C[j,i] = NaN + else + Xirank = tiedrank(Xi) + C[i,j] = C[j,i] = cor(Xjrank, Xirank) + end + end + end + return C +end -corspearman(X::RealMatrix) = (Z = mapslices(tiedrank, X, dims=1); cor(Z, Z)) +function corspearman(X::RealMatrix, Y::RealMatrix) + size(X, 1) == size(Y, 1) || + throw(ArgumentError("number of columns in each array must match")) + nr = size(X, 2) + nc = size(Y, 2) + C = Matrix{Float64}(undef, nr, nc) + for j = 1:nr + Xj = view(X, :, j) + if any(isnan, Xj) + C[j,:] .= NaN + continue + end + Xjrank = tiedrank(Xj) + for i = 1:nc + Yi = view(Y, :, i) + if any(isnan, Yi) + C[j,i] = NaN + else + Yirank = tiedrank(Yi) + C[j,i] = cor(Xjrank, Yirank) + end + end + end + return C +end ####################################### @@ -44,7 +133,7 @@ function corkendall!(x::RealVector, y::RealVector, permx::AbstractVector{<:Integ # Initial sorting permute!(x, permx) permute!(y, permx) - + # Use widen to avoid overflows on both 32bit and 64bit npairs = div(widen(n) * (n - 1), 2) ntiesx = ndoubleties = nswaps = widen(0) diff --git a/test/rankcorr.jl b/test/rankcorr.jl index 6110d4c41..93b64449b 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -2,10 +2,11 @@ using StatsBase using Test X = Float64[1 0; 2 1; 3 0; 4 1; 5 10] +Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10] x1 = X[:,1] x2 = X[:,2] -y = [5, 3, 4, 2, 5] +y = Y[:,1] # corspearman @@ -23,6 +24,9 @@ c22 = corspearman(x2, x2) @test corspearman(X, X) ≈ [c11 c12; c12 c22] @test corspearman(X) ≈ [c11 c12; c12 c22] +@test corspearman(X, Y) == + [corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)] + # corkendall # Check error, handling of NaN, Inf etc @@ -106,3 +110,52 @@ w = repeat(z, n) StatsBase.midpoint(1,10) == 5 StatsBase.midpoint(1,widen(10)) == 5 + + +# NaN handling + +Xnan = copy(X) +Xnan[1,1] = NaN +Ynan = copy(Y) +Ynan[2,1] = NaN + +for f in (corspearman, corkendall) + @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) + @test all(isnan, f([1.0, NaN], [1 2; 3 4])) + @test all(isnan, f([1 2; 3 4], [1.0, NaN])) + @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) + @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) + @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + + @test isequal(f(Xnan, Ynan), + [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) + @test isequal(f(Xnan), + [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) + for i in axes(Xnan, 2), j in axes(Xnan, 2)]) + for k in 1:2 + @test isequal(f(Xnan[:,k], Ynan), + [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) + # TODO: fix corkendall (PR#659) + if f === corspearman + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) + else + @test isequal(f(Xnan, Ynan[:,k]), + [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)]) + end + end +end + + +# Wrong dimensions + +@test_throws DimensionMismatch corspearman([1], [1, 2]) +@test_throws DimensionMismatch corspearman([1], [1 2; 3 4]) +@test_throws DimensionMismatch corspearman([1 2; 3 4], [1]) +@test_throws ArgumentError corspearman([1 2; 3 4: 4 6], [1 2; 3 4]) + +# TODO: fix corkendall to match corspearman (PR#659) +@test_throws ErrorException corkendall([1], [1, 2]) +@test_throws ErrorException corkendall([1], [1 2; 3 4]) +@test_throws ErrorException corkendall([1 2; 3 4], [1]) +@test_throws ArgumentError corkendall([1 2; 3 4: 4 6], [1 2; 3 4]) \ No newline at end of file