Skip to content

Commit 2339424

Browse files
authored
Merge pull request #55 from alan-turing-institute/hackmat
Hackmat (addresses #49)
2 parents fe7e105 + 22bc21e commit 2339424

File tree

5 files changed

+61
-12
lines changed

5 files changed

+61
-12
lines changed

src/ScientificTypes.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export autotype
1010

1111
using Tables, CategoricalArrays, ColorTypes
1212

13+
const CategoricalElement{U} = Union{CategoricalValue{<:Any,U},CategoricalString{U}}
14+
1315
# ## FOR DEFINING SCITYPES ON OBJECTS DETECTED USING TRAITS
1416

1517
# We define a "dynamically" extended function `trait`:

src/conventions/mlj/finite.jl

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,19 @@ function _finalize_finite_coerce(v, verbosity, T2)
2222
return categorical(v, true, ordered=T2<:Union{Missing,OrderedFactor})
2323
end
2424

25+
# HACK: The following method (9 lines) can be removed after resolution
26+
# of https://github.com/JuliaData/CategoricalArrays.jl/issues/226:
27+
function _finalize_finite_coerce(v::CategoricalArray, verbosity, T2)
28+
su = scitype_union(v)
29+
if su >: Missing && !(T2 >: Missing)
30+
verbosity > 0 && _coerce_missing_warn(T2)
31+
end
32+
if su <: T2
33+
return v
34+
end
35+
return ordered!(compress(v), T2<:Union{Missing,OrderedFactor})
36+
end
37+
2538
# if v is not a CategoricalArray:
2639
function coerce(v::AbstractArray,
2740
::Type{T2}; verbosity=1) where T2<:Union{Missing,Finite}
@@ -35,14 +48,9 @@ coerce(v::CategoricalArray,
3548
::Type{T2}; verbosity=1) where T2<:Union{Missing,Finite} =
3649
_finalize_finite_coerce(v, verbosity, T2)
3750

38-
# if v is a CategoricalArray{Any}
51+
# if v is a CategoricalArray{Any} (a bit of a hack):
3952
function coerce(v::CategoricalArray{Any},
4053
::Type{T2}; verbosity=1) where T2<:Union{Missing,Finite}
41-
42-
# AFTER CategoricalArrays 0.7.2 IS RELEASED:
43-
# return _finalize_finite_coerce(broadcast(identity, v), verbosity, T2)
44-
45-
# TEMPORARY HACK:
4654
levels_ = levels(v)
4755
isordered_ = isordered(v)
4856
vraw = broadcast(get_, v)

src/conventions/mlj/mlj.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
scitype(::AbstractFloat, ::Val{:mlj}) = Continuous
22
scitype(::Integer, ::Val{:mlj}) = Count
33

4-
_coerce_missing_warn(T) =
5-
@warn "Missing values encountered coercing scitype to $T.\n"*
6-
"Coerced to Union{Missing,$T} instead. "
7-
4+
function _coerce_missing_warn(::Type{T}) where T
5+
T >: Missing || @warn "Missing values encountered coercing scitype to $T.\n"*
6+
"Coerced to Union{Missing,$T} instead. "
7+
end
88

99
# ## IMPLEMENT PERFORMANCE BOOSTING FOR ARRAYS
1010

@@ -27,6 +27,9 @@ function coerce(y::AbstractArray{<:Union{Missing,Real}},
2727
return float(y)
2828
end
2929

30+
_float(y::CategoricalElement) = float(_int(y))
31+
_float(y) = float(y)
32+
3033
# NOTE: case where the data may have been badly encoded and resulted
3134
# in an Any[] array a user should proceed with caution here in
3235
# particular: - if at one point it encounters a type for which there
@@ -39,14 +42,15 @@ function coerce(y::AbstractArray, T::Type{<:Union{Missing,Continuous}}; verbosit
3942
has_chars = findfirst(e->isa(e,Char), y) !== nothing
4043
has_chars && verbosity > 0 && @warn "Char values will be coerced to " *
4144
"AbstractFloat (e.g. 'A' to 65.0)."
42-
return float.(y)
45+
return _float.(y)
4346
end
4447

4548

4649
## COERCE ARRAY TO COUNT
4750

4851
_int(::Missing) = missing
4952
_int(x::Integer) = x
53+
_int(x::CategoricalElement) = CategoricalArrays.order(x.pool)[x.level]
5054
_int(x) = Int(x) # may throw InexactError
5155

5256
# no-op case

src/tables.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ function _coerce_col(X, name, types; args...)
3636
Xcol = Tables.columntable(X)
3737
y = getproperty(X, name)
3838
if haskey(types, name)
39+
# HACK y isa LazyArrays.ApplyArray, see issue #49
40+
props = propertynames(y)
41+
if :f in props && :args in props
42+
y = convert(Vector, y)
43+
end
3944
return coerce(y, types[name]; args...)
4045
else
4146
return y

test/runtests.jl

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,16 @@ end
169169
@test scitype_union(coerce([:x, :y], Finite)) === Multiclass{2}
170170
@test scitype_union(@test_logs((:warn, r"Missing values encountered"),
171171
coerce([:x, :y, missing], Finite))) ===
172-
Union{Missing, Multiclass{2}}
172+
Union{Missing, Multiclass{2}}
173+
174+
# More finite conversions (to check resolution of #48):
175+
y = categorical([1, 2, 3, missing]) # unordered
176+
yc = coerce(y, OrderedFactor)
177+
@test isordered(yc)
178+
@test yc[1].pool.ordered
179+
scitype(y) == AbstractVector{OrderedFactor{2}}
180+
scitype_union(y) == OrderedFactor{2}
181+
173182
end
174183

175184
@testset "coercion works for arrays too" begin
@@ -215,4 +224,25 @@ end
215224
@test eltype(v2c) <: CategoricalValue{Char}
216225
end
217226

227+
@testset "Cat->Count,Continuous (mlj)" begin
228+
a = categorical(["a","b","a","b",missing])
229+
a1 = coerce(a, Union{Count,Missing})
230+
@test scitype_union(a1) == Union{Missing,Count}
231+
@test all(skipmissing(a1 .== [1, 2, 1, 2, missing]))
232+
a1 = coerce(a, Union{Continuous,Missing})
233+
@test scitype_union(a1) == Union{Missing,Continuous}
234+
@test all(skipmissing(a1 .== [1., 2., 1., 2., missing]))
235+
236+
# XXX
237+
238+
y = categorical(1:10, ordered=true)
239+
new_order = [4, 10, 9, 7, 6, 2, 8, 3, 1, 5]
240+
levels!(y, new_order)
241+
@test all(coerce(y, Count) .== sortperm(new_order))
242+
@test all(coerce(y, Count) .== [9, 6, 8, 1, 10, 5, 4, 7, 3, 2])
243+
244+
y = categorical([1:10..., missing, 11], ordered=true)
245+
@test all(skipmissing(coerce(y, Union{Continuous, Missing}) .== float.([1:10...,missing,11])))
246+
end
247+
218248
include("autotype.jl")

0 commit comments

Comments
 (0)