Skip to content

Commit 609f260

Browse files
authored
Merge pull request #72 from alan-turing-institute/standardizer
Migration, standardizer and a few fixes
2 parents 3aca3d7 + 21d99e4 commit 609f260

19 files changed

+1123
-1119
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
2222
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
2323

2424
[compat]
25-
MLJBase = "0.5"
25+
MLJBase = "0.6"
2626
julia = "1"
2727

2828
[extras]

src/Clustering.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
module Clustering_
77

88
import MLJBase
9+
import MLJBase: @mlj_model
910
using ScientificTypes
1011

1112
import ..Clustering # strange sytax for lazy-loading
@@ -15,8 +16,6 @@ using LinearAlgebra: norm
1516

1617
const C = Clustering
1718

18-
import ..@mlj_model
19-
2019
const KMeansDescription =
2120
"""
2221
K-Means algorithm: find K centroids corresponding to K clusters in the data.

src/MLJModels.jl

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,25 @@
11
module MLJModels
22

3+
using MLJBase, Tables, ScientificTypes
4+
using Requires, Pkg.TOML, OrderedCollections
5+
using StatsBase # countmap is required in metadata
6+
37
# for administrators to update Metadata.toml:
48
export @update
59

6-
# from builtins/Transformers.jl:
7-
export StaticTransformer, FillImputer, FeatureSelector,
8-
UnivariateStandardizer, Standardizer,
9-
UnivariateBoxCoxTransformer,
10-
OneHotEncoder
11-
12-
# from builtins/Constant.jl:
13-
export ConstantRegressor, ConstantClassifier
14-
15-
# from builtins/KNN.jl:
16-
export KNNRegressor
17-
1810
# from loading.jl:
1911
export load, @load, info
2012

2113
# from model_search:
2214
export models, localmodels
2315

24-
using Requires
25-
using OrderedCollections
26-
using MLJBase
27-
using ScientificTypes
28-
using Tables
29-
using ColorTypes
30-
using StatsBase
16+
# from model/Constant
17+
export ConstantRegressor, ConstantClassifier,
18+
DeterministicConstantRegressor, DeterministicConstantClassifier
3119

32-
using Pkg.TOML
20+
# from model/Transformers
21+
export FeatureSelector, StaticTransformer, UnivariateStandardizer,
22+
Standardizer, UnivariateBoxCoxTransformer, OneHotEncoder, FillImputer
3323

3424
const srcdir = dirname(@__FILE__) # the directory containing this file
3525

@@ -40,25 +30,32 @@ if VERSION < v"1.3"
4030
end
4131
nonmissing = nonmissingtype
4232

43-
44-
include("metadata_utils.jl")
45-
4633
include("metadata.jl")
4734
include("model_search.jl")
4835
include("loading.jl")
4936
include("registry/src/Registry.jl")
5037
import .Registry.@update
5138

5239
# load built-in models:
53-
include("builtins/Transformers.jl")
5440
include("builtins/Constant.jl")
5541

56-
include("parameters_utils.jl")
57-
include("metadata_utils.jl")
42+
include("builtins/Transformers.jl")
43+
44+
const INFO_GIVEN_HANDLE = Dict{Handle,Any}()
45+
const PKGS_GIVEN_NAME = Dict{String,Vector{String}}()
46+
const AMBIGUOUS_NAMES = String[]
47+
const NAMES = String[]
5848

49+
# lazily load in strap-on model interfaces for external packages:
5950
function __init__()
51+
metadata_file = joinpath(srcdir, "registry", "Metadata.toml")
52+
53+
merge!(INFO_GIVEN_HANDLE, info_given_handle(metadata_file))
54+
merge!(PKGS_GIVEN_NAME, pkgs_given_name(INFO_GIVEN_HANDLE))
55+
append!(AMBIGUOUS_NAMES, ambiguous_names(INFO_GIVEN_HANDLE))
56+
append!(NAMES, model_names(INFO_GIVEN_HANDLE))
57+
@info "Model metadata loaded from registry. "
6058

61-
# lazily load in strap-on model interfaces for external packages:
6259
@require MultivariateStats="6f286f6a-111f-5878-ab1e-185364afe411" include("MultivariateStats.jl")
6360
@require DecisionTree="7806a523-6efd-50cb-b5f6-3fa6f1930dbb" include("DecisionTree.jl")
6461
@require GaussianProcesses="891a1506-143c-57d2-908e-e1f8e92e6de9" include("GaussianProcesses.jl")
@@ -69,7 +66,6 @@ function __init__()
6966
@require XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" include("XGBoost.jl")
7067
@require LIBSVM="b1bec4e5-fd48-53fe-b0cb-9723c09d164b" include("LIBSVM.jl")
7168
@require NearestNeighbors="b8a86587-4115-5ab1-83bc-aa920d37bbce" include("NearestNeighbors.jl")
72-
7369
end
7470

7571
end # module

src/MultivariateStats.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module MultivariateStats_
33
export RidgeRegressor, PCA, KernelPCA, ICA
44

55
import MLJBase
6+
import MLJBase: @mlj_model
67
using ScientificTypes
78
using Tables
89

@@ -15,8 +16,6 @@ struct LinearFitresult{F} <: MLJBase.MLJType
1516
bias::F
1617
end
1718

18-
import ..@mlj_model
19-
2019
####
2120
#### RIDGE
2221
####

src/NearestNeighbors.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
module NearestNeighbors_
22

33
import MLJBase
4+
import MLJBase: @mlj_model, metadata_model, metadata_pkg
45
using Distances
56

67
import ..NearestNeighbors
7-
import ..@mlj_model, ..metadata_pkg, ..metadata_model
88

99
const NN = NearestNeighbors
1010

src/ScikitLearn/ScikitLearn.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ module ScikitLearn_
22

33
#> for all Supervised models:
44
import MLJBase
5+
import MLJBase: @mlj_model, metadata_model,
6+
_process_model_def, _model_constructor, _model_cleaner
57
using ScientificTypes
68
using Tables
79

@@ -23,9 +25,6 @@ import ..ScikitLearn
2325

2426
include("svm.jl")
2527

26-
import .._process_model_def, .._model_constructor, .._model_cleaner
27-
import ..metadata_model # metadata_pkg is handled by @sk_model
28-
2928
const Option{T} = Union{Nothing, T}
3029

3130
const SKLM = ((ScikitLearn.Skcore).pyimport("sklearn.linear_model"))

src/builtins/Constant.jl

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
1-
# this file defines *and* loads one module
2-
31
module Constant
42

5-
export ConstantRegressor, ConstantClassifier
6-
export DeterministicConstantRegressor, DeterministicConstantClassifier
3+
using ..MLJBase, ..Tables
4+
using Distributions
75

8-
import MLJBase
9-
import MLJBase.nrows
10-
import Distributions
11-
using StatsBase
12-
using Statistics
13-
using CategoricalArrays
14-
using ScientificTypes
6+
export ConstantRegressor, ConstantClassifier,
7+
DeterministicConstantRegressor, DeterministicConstantClassifier
158

169
## THE CONSTANT REGRESSOR
1710

@@ -23,7 +16,7 @@ probability distribution best fitting the training target data. Use
2316
`predict_mean` to predict the mean value instead.
2417
2518
"""
26-
struct ConstantRegressor{D} <: MLJBase.Probabilistic
19+
struct ConstantRegressor{D} <: MLJBase.Probabilistic
2720
distribution_type::Type{D}
2821
end
2922
function ConstantRegressor(; distribution_type=Distributions.Normal)
@@ -62,8 +55,8 @@ MLJBase.package_uuid(::Type{<:ConstantRegressor}) =
6255
"d491faf4-2d78-11e9-2867-c94bc002c0b7"
6356
MLJBase.package_url(::Type{<:ConstantRegressor}) = "https://github.com/alan-turing-institute/MLJModels.jl"
6457
MLJBase.is_pure_julia(::Type{<:ConstantRegressor}) = true
65-
MLJBase.input_scitype(::Type{<:ConstantRegressor}) = Table(Scientific) # anything goes
66-
MLJBase.target_scitype(::Type{<:ConstantRegressor}) = AbstractVector{Continuous}
58+
MLJBase.input_scitype(::Type{<:ConstantRegressor}) = MLJBase.Table(MLJBase.Scientific) # anything goes
59+
MLJBase.target_scitype(::Type{<:ConstantRegressor}) = AbstractVector{MLJBase.Continuous}
6760

6861

6962
## THE CONSTANT DETERMINISTIC REGRESSOR (FOR TESTING)
@@ -85,8 +78,8 @@ MLJBase.package_name(::Type{<:DeterministicConstantRegressor}) = MLJBase.package
8578
MLJBase.package_uuid(::Type{<:DeterministicConstantRegressor}) = MLJBase.package_url(ConstantRegressor)
8679
MLJBase.package_url(::Type{<:DeterministicConstantRegressor}) = MLJBase.package_url(ConstantRegressor)
8780
MLJBase.is_pure_julia(::Type{<:DeterministicConstantRegressor}) = true
88-
MLJBase.input_scitype(::Type{<:DeterministicConstantRegressor}) = Table(Scientific) # anything goes
89-
MLJBase.target_scitype(::Type{<:DeterministicConstantRegressor}) = AbstractVector{Continuous}
81+
MLJBase.input_scitype(::Type{<:DeterministicConstantRegressor}) = MLJBase.Table(MLJBase.Scientific) # anything goes
82+
MLJBase.target_scitype(::Type{<:DeterministicConstantRegressor}) = AbstractVector{MLJBase.Continuous}
9083

9184

9285
## THE CONSTANT CLASSIFIER
@@ -104,7 +97,7 @@ obtain the training target mode instead.
10497
struct ConstantClassifier <: MLJBase.Probabilistic end
10598

10699
function MLJBase.fit(model::ConstantClassifier,
107-
verbosity::Int, X, y)
100+
verbosity::Int, X, y)
108101

109102
fitresult = Distributions.fit(MLJBase.UnivariateFinite, y)
110103

@@ -127,8 +120,8 @@ MLJBase.package_name(::Type{<:ConstantClassifier}) = MLJBase.package_name(Consta
127120
MLJBase.package_uuid(::Type{<:ConstantClassifier}) = MLJBase.package_uuid(ConstantRegressor)
128121
MLJBase.package_url(::Type{<:ConstantClassifier}) = MLJBase.package_url(ConstantRegressor)
129122
MLJBase.is_pure_julia(::Type{<:ConstantClassifier}) = true
130-
MLJBase.input_scitype(::Type{<:ConstantClassifier}) = Table(Scientific) # anything goes
131-
MLJBase.target_scitype(::Type{<:ConstantClassifier}) = AbstractVector{<:Finite}
123+
MLJBase.input_scitype(::Type{<:ConstantClassifier}) = MLJBase.Table(MLJBase.Scientific) # anything goes
124+
MLJBase.target_scitype(::Type{<:ConstantClassifier}) = AbstractVector{<:MLJBase.Finite}
132125

133126

134127
## DETERMINISTIC CONSTANT CLASSIFIER (FOR TESTING)
@@ -161,13 +154,9 @@ MLJBase.package_name(::Type{<:DeterministicConstantClassifier}) = MLJBase.packag
161154
MLJBase.package_uuid(::Type{<:DeterministicConstantClassifier}) = MLJBase.package_uuid(ConstantRegressor)
162155
MLJBase.package_url(::Type{<:DeterministicConstantClassifier}) = MLJBase.package_url(ConstantRegressor)
163156
MLJBase.is_pure_julia(::Type{<:DeterministicConstantClassifier}) = true
164-
MLJBase.input_scitype(::Type{<:DeterministicConstantClassifier}) = Table(Scientific) # anything goes
165-
MLJBase.target_scitype(::Type{<:DeterministicConstantClassifier}) = AbstractVector{<:Finite}
166-
157+
MLJBase.input_scitype(::Type{<:DeterministicConstantClassifier}) = MLJBase.Table(MLJBase.Scientific) # anything goes
158+
MLJBase.target_scitype(::Type{<:DeterministicConstantClassifier}) = AbstractVector{<:MLJBase.Finite}
167159

168-
end # module
169-
170-
171-
## EXPOSE THE INTERFACE
160+
end
172161

173-
using .Constant
162+
using MLJModels.Constant

0 commit comments

Comments
 (0)