|
1 | 1 | # Unit tests for Distances |
2 | 2 |
|
| 3 | +using SparseArrays: sparsevec, sprand |
| 4 | + |
3 | 5 | struct FooDist <: PreMetric end # Julia 1.0 Compat: struct definition must be put in global scope |
4 | 6 |
|
5 | 7 | @testset "result_type" begin |
|
217 | 219 | for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]), |
218 | 220 | ([4.0, 5.0, 6.0, 7.0], [3. 8.; 9. 1.0])) |
219 | 221 | x, y = T.(_x), T.(_y) |
220 | | - for (x, y) in ((x, y), |
| 222 | + for (x, y) in ((x, y), (sparsevec(x), sparsevec(y)), |
221 | 223 | (convert(Array{Union{Missing, T}}, x), convert(Array{Union{Missing, T}}, y)), |
222 | 224 | ((Iterators.take(x, 4), Iterators.take(y, 4))), # iterator |
223 | 225 | (((x[i] for i in 1:length(x)), (y[i] for i in 1:length(y)))), # generator |
@@ -331,7 +333,8 @@ end # testset |
331 | 333 | end #testset |
332 | 334 |
|
333 | 335 | @testset "empty vector" begin |
334 | | - for T in (Float64, F64), (a, b) in ((T[], T[]), (Iterators.take(T[], 0), Iterators.take(T[], 0))) |
| 336 | + for T in (Float64, F64), (a, b) in ((T[], T[]), (Iterators.take(T[], 0), Iterators.take(T[], 0)), |
| 337 | + (sprand(T, 0, .1), sprand(T, 0, .1))) |
335 | 338 | @test sqeuclidean(a, b) == 0.0 |
336 | 339 | @test isa(sqeuclidean(a, b), T) |
337 | 340 | @test euclidean(a, b) == 0.0 |
@@ -391,6 +394,10 @@ end # testset |
391 | 394 | @test_throws DimensionMismatch colwise!(mat23, Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x), mat23, mat22) |
392 | 395 | @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x)([1, 2, 3], [1, 2]) |
393 | 396 | @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> [1, 2])([1, 2, 3], [1, 2, 3]) |
| 397 | + sv1 = sprand(10, .2) |
| 398 | + sv2 = sprand(20, .2) |
| 399 | + @test_throws DimensionMismatch euclidean(sv1, sv2) |
| 400 | + @test_throws DimensionMismatch bhattacharyya(sv1, sv2) |
394 | 401 | end # testset |
395 | 402 |
|
396 | 403 | @testset "Different input types" begin |
@@ -504,41 +511,43 @@ end |
504 | 511 |
|
505 | 512 | @testset "bhattacharyya / hellinger" begin |
506 | 513 | for T in (Int, Float64, F64) |
507 | | - x, y = T.([4, 5, 6, 7]), T.([3, 9, 8, 1]) |
508 | | - a = T.([1, 2, 1, 3, 2, 1]) |
509 | | - b = T.([1, 3, 0, 2, 2, 0]) |
510 | | - p = T == Int ? rand(0:10, 12) : rand(T, 12) |
511 | | - p[p .< median(p)] .= 0 |
512 | | - q = T == Int ? rand(0:10, 12) : rand(T, 12) |
513 | | - |
514 | | - # Bhattacharyya and Hellinger distances are defined for discrete |
515 | | - # probability distributions so to calculate the expected values |
516 | | - # we need to normalize vectors. |
517 | | - px = x ./ sum(x) |
518 | | - py = y ./ sum(y) |
519 | | - expected_bc_x_y = sum(sqrt.(px .* py)) |
520 | | - for (x, y) in ((x, y), (Iterators.take(x, 12), Iterators.take(y, 12))) |
521 | | - @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y |
522 | | - @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) |
523 | | - @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) |
524 | | - end |
| 514 | + _x, _y = T.([4, 5, 6, 7]), T.([3, 9, 8, 1]) |
| 515 | + _a = T.([1, 2, 1, 3, 2, 1]) |
| 516 | + _b = T.([1, 3, 0, 2, 2, 0]) |
| 517 | + _p = T == Int ? rand(0:10, 12) : rand(T, 12) |
| 518 | + _p[_p .< median(_p)] .= 0 |
| 519 | + _q = T == Int ? rand(0:10, 12) : rand(T, 12) |
| 520 | + |
| 521 | + for (x, y, a, b, p, q) in ((_x, _y, _a, _b, _p, _q), sparsevec.((_x, _y, _a, _b, _p, _q))) |
| 522 | + # Bhattacharyya and Hellinger distances are defined for discrete |
| 523 | + # probability distributions so to calculate the expected values |
| 524 | + # we need to normalize vectors. |
| 525 | + px = x ./ sum(x) |
| 526 | + py = y ./ sum(y) |
| 527 | + expected_bc_x_y = sum(sqrt.(px .* py)) |
| 528 | + for (x, y) in ((x, y), (Iterators.take(x, 12), Iterators.take(y, 12))) |
| 529 | + @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y |
| 530 | + @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) |
| 531 | + @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) |
| 532 | + end |
525 | 533 |
|
526 | | - pa = a ./ sum(a) |
527 | | - pb = b ./ sum(b) |
528 | | - expected_bc_a_b = sum(sqrt.(pa .* pb)) |
529 | | - @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b |
530 | | - @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) |
531 | | - @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) |
532 | | - |
533 | | - pp = p ./ sum(p) |
534 | | - pq = q ./ sum(q) |
535 | | - expected_bc_p_q = sum(sqrt.(pp .* pq)) |
536 | | - @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q |
537 | | - @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) |
538 | | - @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) |
539 | | - |
540 | | - # Ensure it is semimetric |
541 | | - @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) |
| 534 | + pa = a ./ sum(a) |
| 535 | + pb = b ./ sum(b) |
| 536 | + expected_bc_a_b = sum(sqrt.(pa .* pb)) |
| 537 | + @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b |
| 538 | + @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) |
| 539 | + @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) |
| 540 | + |
| 541 | + pp = p ./ sum(p) |
| 542 | + pq = q ./ sum(q) |
| 543 | + expected_bc_p_q = sum(sqrt.(pp .* pq)) |
| 544 | + @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q |
| 545 | + @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) |
| 546 | + @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) |
| 547 | + |
| 548 | + # Ensure it is semimetric |
| 549 | + @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) |
| 550 | + end |
542 | 551 | end |
543 | 552 | end #testset |
544 | 553 |
|
|
769 | 778 |
|
770 | 779 | X = rand(ComplexF64, m, nx) |
771 | 780 | Y = rand(ComplexF64, m, ny) |
772 | | - |
| 781 | + |
773 | 782 | test_pairwise(SqEuclidean(), X, Y, Float64) |
774 | 783 | test_pairwise(Euclidean(), X, Y, Float64) |
775 | 784 |
|
|
946 | 955 | @test pairwise(PeriodicEuclidean(p), X, Y, dims=2)[1,2] == 0m |
947 | 956 | end |
948 | 957 |
|
| 958 | +@testset "SparseVector, nnz(a) != nnz(b)" begin |
| 959 | + for (n, densa, densb) in ((100, .1, .8), (200, .8, .1)) |
| 960 | + a = sprand(n, densa) |
| 961 | + b = sprand(n, densb) |
| 962 | + for d in (bhattacharyya, euclidean, sqeuclidean, jaccard, cityblock, totalvariation, |
| 963 | + chebyshev, braycurtis, hamming) |
| 964 | + @test d(a, b) ≈ d(Vector(a), Vector(b)) |
| 965 | + end |
| 966 | + end |
| 967 | +end |
| 968 | + |
949 | 969 | #= |
950 | 970 | @testset "zero allocation colwise!" begin |
951 | 971 | d = Euclidean() |
|
0 commit comments