Merge pull request #770 from JuliaAI/dev

ablaom · web-flow · commit cc3dbe5d6261 · 2022-05-18T08:44:51.000+12:00
For a 0.20.3 release
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJBase"
 uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.20.2"
+version = "0.20.3"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
diff --git a/src/resampling.jl b/src/resampling.jl
@@ -462,12 +462,19 @@ outlier detection model.
 When `evaluate`/`evaluate!` is called, a number of train/test pairs
 ("folds") of row indices are generated, according to the options
 provided, which are discussed in the [`evaluate!`](@ref)
-doc-string. Rows correspond to observations.  The train/test pairs
-generated are recorded in the `train_test_rows` field of the
+doc-string. Rows correspond to observations. The generated train/test
+pairs are recorded in the `train_test_rows` field of the
 `PerformanceEvaluation` struct, and the corresponding estimates,
 aggregated over all train/test pairs, are recorded in `measurement`, a
 vector with one entry for each measure (metric) recorded in `measure`.
 
+When displayed, a `PerformanceEvalution` object includes a value under
+the heading `1.96*SE`, derived from the standard error of the `per_fold`
+entries. This value is suitable for constructing a formal 95%
+confidence interval for the given `measurement`. Such intervals should
+be interpreted with caution. See, for example, Bates et al.
+[(2021)](https://arxiv.org/abs/2104.00673).
+
 ### Fields
 
 These fields are part of the public API of the `PerformanceEvaluation`
@@ -503,8 +510,9 @@ struct.
   machine `mach` training in resampling - one machine per train/test
   pair.
 
-- `train_test_rows`: a vector of tuples, each of the form `(train, test)`, where `train` and `test` 
-   are vectors of row (observation) indices for training and evaluation respectively. 
+- `train_test_rows`: a vector of tuples, each of the form `(train, test)`,
+  where `train` and `test` are vectors of row (observation) indices for
+  training and evaluation respectively.
 """
 struct PerformanceEvaluation{M,
                              Measurement,
@@ -532,18 +540,35 @@ _short(v::Vector{<:Real}) = MLJBase.short_string(v)
 _short(v::Vector) = string("[", join(_short.(v), ", "), "]")
 _short(::Missing) = missing
 
-function Base.show(io::IO, ::MIME"text/plain", e::PerformanceEvaluation)
-    _measure =  map(e.measure) do m
-        repr(MIME("text/plain"), m)
+function _standard_errors(e::PerformanceEvaluation)
+    factor = 1.96 # For the 95% confidence interval.
+    measure = e.measure
+    nfolds = length(e.per_fold[1])
+    nfolds == 1 && return [nothing]
+    std_errors = map(e.per_fold) do per_fold
+        factor * std(per_fold) / sqrt(nfolds - 1)
     end
+    return std_errors
+end
+
+function Base.show(io::IO, ::MIME"text/plain", e::PerformanceEvaluation)
+    _measure = [repr(MIME("text/plain"), m) for m in e.measure]
     _measurement = round3.(e.measurement)
     _per_fold = [round3.(v) for v in e.per_fold]
+    _sterr = round3.(_standard_errors(e))
+
+    # Only show the standard error if the number of folds is higher than 1.
+    show_sterr = any(!isnothing, _sterr)
+    data = show_sterr ?
+        hcat(_measure, e.operation, _measurement, _sterr, _per_fold) :
+        hcat(_measure, e.operation, _measurement, _per_fold)
+    header = show_sterr ?
+        ["measure", "operation", "measurement", "1.96*SE", "per_fold"] :
+        ["measure", "operation", "measurement", "per_fold"]
 
-    data = hcat(_measure, _measurement, e.operation, _per_fold)
-    header = ["measure", "measurement", "operation", "per_fold"]
     println(io, "PerformanceEvaluation object "*
             "with these fields:")
-    println(io, "  measure, measurement, operation, per_fold,\n"*
+    println(io, "  measure, operation, measurement, per_fold,\n"*
             "  per_observation, fitted_params_per_fold,\n"*
             "  report_per_fold, train_test_rows")
     println(io, "Extract:")
diff --git a/test/preliminaries.jl b/test/preliminaries.jl
@@ -0,0 +1,66 @@
+using MLJBase
+if !MLJBase.TESTING
+    error(
+        "To test MLJBase, the environment variable "*
+        "`TEST_MLJBASE` must be set to `\"true\"`\n"*
+        "You can do this in the REPL with `ENV[\"TEST_MLJBASE\"]=\"true\"`"
+    )
+end
+
+using Distributed
+# Thanks to https://stackoverflow.com/a/70895939/5056635 for the exeflags tip.
+addprocs(; exeflags="--project=$(Base.active_project())")
+
+@info "nprocs() = $(nprocs())"
+@static if VERSION >= v"1.3.0-DEV.573"
+    import .Threads
+    @info "nthreads() = $(Threads.nthreads())"
+else
+    @info "Running julia $(VERSION). Multithreading tests excluded. "
+end
+
+@everywhere begin
+    using MLJModelInterface
+    using MLJBase
+    using Test
+    using CategoricalArrays
+    using Logging
+    using ComputationalResources
+    using StableRNGs
+end
+
+import TypedTables
+using Tables
+
+function include_everywhere(filepath)
+    include(filepath) # Load on Node 1 first, triggering any precompile
+    if nprocs() > 1
+        fullpath = joinpath(@__DIR__, filepath)
+        @sync for p in workers()
+            @async remotecall_wait(include, p, fullpath)
+        end
+    end
+end
+
+include("test_utilities.jl")
+
+# load Models module containing model implementations for testing:
+print("Loading some models for testing...")
+include_everywhere("_models/models.jl")
+print("\r                                           \r")
+
+# enable conditional testing of modules by providing test_args
+# e.g. `Pkg.test("MLJBase", test_args=["misc"])`
+RUN_ALL_TESTS = isempty(ARGS)
+macro conditional_testset(name, expr)
+    name = string(name)
+    esc(quote
+        if RUN_ALL_TESTS || $name in ARGS
+            @testset $name $expr
+        end
+    end)
+end
+
+# To avoid printing `@conditional_testset (macro with 1 method)`
+# when loading this file via `include("test/preliminaries.jl")`.
+nothing
diff --git a/test/resampling.jl b/test/resampling.jl
@@ -775,12 +775,21 @@ end
     @test T <: PerformanceEvaluation
 
     show_text = sprint(show, MIME"text/plain"(), evaluations)
+    cols = ["measure", "operation", "measurement", "1.96*SE", "per_fold"]
+    @test all(contains.(show_text, cols))
+    print(show_text)
     docstring_text = string(@doc(PerformanceEvaluation))
     for fieldname in fieldnames(PerformanceEvaluation)
         @test contains(show_text, string(fieldname))
         # string(text::Markdown.MD) converts `-` list items to `*`.
         @test contains(docstring_text, " * `$fieldname`")
     end
+
+    measures = [LogLoss(), Accuracy()]
+    evaluations = evaluate(clf, X, y; measures, resampling=Holdout())
+    show_text = sprint(show, MIME"text/plain"(), evaluations)
+    print(show_text)
+    @test !contains(show_text, "std")
 end
 
 #end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,65 +1,19 @@
-using Distributed
-addprocs()
-
-
-using MLJBase
-if !MLJBase.TESTING
-    error(
-        "To test MLJBase, the environment variable "*
-        "`TEST_MLJBASE` must be set to `\"true\"`\n"*
-        "You can do this in the REPL with `ENV[\"TEST_MLJBASE\"]=\"true\"`"
-    )
-end
-
-@info "nprocs() = $(nprocs())"
-@static if VERSION >= v"1.3.0-DEV.573"
-    import .Threads
-    @info "nthreads() = $(Threads.nthreads())"
-else
-    @info "Running julia $(VERSION). Multithreading tests excluded. "
-end
-
-@everywhere begin
-    using MLJModelInterface
-    using MLJBase
-    using Test
-    using CategoricalArrays
-    using Logging
-    using ComputationalResources
-    using StableRNGs
-end
-
-import TypedTables
-using Tables
-
-function include_everywhere(filepath)
-    include(filepath) # Load on Node 1 first, triggering any precompile
-    if nprocs() > 1
-        fullpath = joinpath(@__DIR__, filepath)
-        @sync for p in workers()
-            @async remotecall_wait(include, p, fullpath)
-        end
-    end
-end
-
-include("test_utilities.jl")
-
-# load Models module containing model implementations for testing:
-print("Loading some models for testing...")
-include_everywhere("_models/models.jl")
-print("\r                                           \r")
-
-# enable conditional testing of modules by providing test_args
-# e.g. `Pkg.test("MLJBase", test_args=["misc"])`
-RUN_ALL_TESTS = isempty(ARGS)
-macro conditional_testset(name, expr)
-    name = string(name)
-    esc(quote
-        if RUN_ALL_TESTS || $name in ARGS
-            @testset $name $expr
-        end
-    end)
-end
+# To speed up the development workflow, use `TestEnv`.
+# For example:
+# ```
+# $ julia --project
+#
+# julia> ENV["TEST_MLJBASE"] = "true"
+#
+# julia> using TestEnv; TestEnv.activate()
+#
+# julia> include("test/preliminaries.jl")
+# [...]
+#
+# julia> include("test/resampling.jl")
+# [...]
+# ```
+include("preliminaries.jl")
 
 @conditional_testset "misc" begin
     @test include("utilities.jl")