Skip to content

Commit d2c6983

Browse files
authored
Use Löffler's recurrence relation for the Wilcox distribution (#184)
1 parent 058cd82 commit d2c6983

File tree

2 files changed

+85
-51
lines changed

2 files changed

+85
-51
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "StatsFuns"
22
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
3-
version = "1.4.0"
3+
version = "1.5.0"
44

55
[deps]
66
HypergeometricFunctions = "34004b35-14d8-5ef3-9330-4cdb6864b03a"

src/distrs/wilcox.jl

Lines changed: 84 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,90 @@
11
#=
2-
The wilcox distribution is equivalent to the problem
3-
of finding the number of subsets with size nx
4-
of {1,2,...,nx,nx+1,...,nx+ny} summing to (U+sum(1:nx))
5-
relative to the total number of subsets with size nx.
6-
The empty subset is defined to sum to zero.
7-
This can be calculated using the recursion:
8-
either nx+ny is in the subset in which case we need to calculate
9-
the number of subsets with size nx-1 of {1,2,...,nx,nx+1,...,nx+ny-1}
10-
summing to (U+sum(1:nx)-nx-ny),
11-
or nx+ny is not in the subset in which case we need to calculate
12-
the number of subsets with size nx of {1,2,...,nx,nx+1,...,nx+ny-1}
13-
summing to (U+sum(1:nx)),
14-
This can be calculated bottom up using dynamic programming.
15-
16-
The (i,j)'th element of DP in the k'th outer loop iteration represents:
17-
the number of subsets with size k of {1,2,...,k,k+1,...,k+j-1} summing to i+sum(1:k)-1.
18-
=#
19-
20-
@inline function wilcoxDP(nx, ny, U)
21-
DP = zeros(Int, U + 1, ny + 1)
22-
for j in 1:(ny + 1)
23-
DP[1, j] = 1
24-
end
25-
for k in 1:nx
26-
for j in 2:(ny + 1)
27-
i_max = min(U, k * (j - 1)) + 1
28-
for i in i_max:-1:max(j, 2)
29-
# In this loop: i_max >= i >= 2 AND i - j >= 0
30-
DP[i, j] = DP[i - j + 1, j] + DP[i, j - 1]
31-
end
32-
for i in min(i_max, j - 1):-1:2
33-
# In this loop: i_max >= i >= 2 AND i - j < 0
34-
DP[i, j] = DP[i, j - 1]
35-
end
2+
Compute the number of sequences of `nx` 0s and `ny` 1s in each of which a 1 precedes a 0 `U` times.
3+
4+
## Details
5+
6+
Due to symmetry, we only have to consider the case `nx ≤ ny`.
7+
Let `m = min(nx, ny)` and `n = max(nx, ny)`, and denote the number of sequences of `m` 0s and `n` 1s
8+
in which a 1 precedes a 0 `U` times by `pₘ,ₙ(U)`.
9+
10+
Mann and Whitney (1947) computed `pₘ,ₙ(U)` by exploiting the recurrence relation
11+
12+
pₘ,ₙ(U) = pₘ₋₁,ₙ(U - n) + pₘ,ₙ₋₁(U)
13+
14+
It can be obtained by considering the sequence with the last element removed:
15+
1. If the last element is a 0, it is preceded by `n` 1s;
16+
and hence in the sequence with the last element removed, consisting of `m - 1` 0s and `n` 1s, a 1 must precede a 0 `U - n` times.
17+
2. If the last element is a 1, it does not precede any 0;
18+
and hence in the sequence with the last element removed, consisting of `m` 0s and `n - 1` 1s, a 1 must preced a 0 `U` times.
19+
20+
Löffler (1983) published the recurrence relation
21+
22+
pₘ,ₙ(U) = 1/U \sum_{a = 0}^{U - 1} σₘ,ₙ(U - a) pₘ,ₙ(a)
23+
24+
where
25+
26+
σₘ,ₙ(k) := \sum_{d | k} ϵₘ,ₙ(d) d
27+
28+
with ϵₘ,ₙ(d) = 1 for 1 ≤ d ≤ m, ϵₘ,ₙ(d) = -1 for n < d ≤ m + n, and ϵₘ,ₙ(d) = 0 otherwise.
29+
Implementation of this recurrence relation allows faster computations with fewer memory allocations.
30+
31+
## References
32+
33+
H. B. Mann, D. R. Whitney. "On a Test of Whether one of Two Random Variables is Stochastically Larger than the Other." Ann. Math. Statist. 18 (1) 50 - 60, March, 1947. https://doi.org/10.1214/aoms/1177730491
34+
A. Löffler: "Über eine Partition der nat. Zahlen und ihre Anwendung beim U-Test." Wissenschaftliche Zeitschrift der Martin-Luther-Universität Halle-Wittenberg; Mathematisch-Naturwissenschaftliche Reihe, XXXII'83 M, Heft 5, 87–89; available as https://upload.wikimedia.org/wikipedia/commons/f/f5/LoefflerWilcoxonMannWhitneyTest.pdf
35+
=#
36+
37+
@inline function wilcox_partitions(nx::Int, ny::Int, U::Int)
38+
# This internal function expects 0 <= U <= nx * ny / 2
39+
if !(0 <= U <= (nx * ny) / 2)
40+
throw(ArgumentError("`wilcox_partitions(nx, ny, U)` is only implemented for 0 <= U <= (nx * ny) / 2"))
41+
end
42+
43+
# Due to symmetry, `wilcox_partitions(nx, ny, U) = wilcox_partitions(ny, nx, U)`
44+
# Hence for simplicity we only consider the case `wilcox_partitions(min(nx, ny), max(nx, ny), U)` here
45+
m, n = minmax(nx, ny)
46+
47+
# Compute σ(k) = ∑_{d|k} ϵ(d) d where
48+
# - ϵ(d) := 1 for 1 ≤ d ≤ m
49+
# - ϵ(d) := -1 for n < d ≤ m + n
50+
# - ϵ(d) := 0 otherwise
51+
sigmas = zeros(Int, U)
52+
for d in 1:m
53+
for i in d:d:U
54+
sigmas[i] += d
3655
end
3756
end
38-
return DP
57+
for d in (n + 1):(m + n)
58+
for i in d:d:U
59+
sigmas[i] -= d
60+
end
61+
end
62+
63+
# Recursively compute the number of partitions pₘ,ₙ(a) for 0 <= a <= U
64+
partitions = Vector{Int}(undef, U + 1)
65+
partitions[1] = 1
66+
for a in 1:U
67+
p = 0
68+
for i in 1:a
69+
p += partitions[i] * sigmas[a + 1 - i]
70+
end
71+
partitions[a + 1] = p ÷ a
72+
end
73+
74+
return partitions
3975
end
4076

4177
function wilcoxpdf(nx::Int, ny::Int, U::Float64)
4278
return isinteger(U) ? wilcoxpdf(nx, ny, Int(U)) : 0.0
4379
end
4480
function wilcoxpdf(nx::Int, ny::Int, U::Int)
45-
if U < 0
46-
return 0.0
47-
end
4881
max_U = nx * ny
49-
U2 = max_U - U
50-
if U2 < U
51-
return wilcoxpdf(nx, ny, U2)
82+
if !(0 <= U <= max_U)
83+
return 0.0
5284
end
53-
DP = wilcoxDP(nx, ny, U)
54-
return DP[U + 1, ny + 1] / binomial(nx + ny, nx)
85+
U = min(U, max_U - U)
86+
partitions = wilcox_partitions(nx, ny, U)
87+
return partitions[end] / binomial(nx + ny, nx)
5588
end
5689

5790
function wilcoxlogpdf(nx::Int, ny::Int, U::Union{Float64, Int})
@@ -62,16 +95,17 @@ function wilcoxcdf(nx::Int, ny::Int, U::Float64)
6295
return wilcoxcdf(nx, ny, round(Int, U, RoundNearestTiesUp))
6396
end
6497
function wilcoxcdf(nx::Int, ny::Int, U::Int)
98+
max_U = nx * ny
6599
if U < 0
66100
return 0.0
101+
elseif U >= max_U
102+
return 1.0
103+
else
104+
U2 = max_U - U - 1
105+
partitions = wilcox_partitions(nx, ny, min(U, U2))
106+
p = sum(float, partitions) / binomial(nx + ny, nx)
107+
return U2 < U ? 1.0 - p : p
67108
end
68-
max_U = nx * ny
69-
U2 = max_U - U - 1
70-
if U2 < U
71-
return 1.0 - wilcoxcdf(nx, ny, U2)
72-
end
73-
DP = wilcoxDP(nx, ny, U)
74-
return sum(float, @view(DP[1:(U + 1), ny + 1])) / binomial(nx + ny, nx)
75109
end
76110

77111
function wilcoxlogcdf(nx::Int, ny::Int, U::Union{Float64, Int})

0 commit comments

Comments
 (0)