Skip to content

Migration, standardizer and a few fixes #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Sep 27, 2019
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
MLJBase = "0.5"
MLJBase = "0.6"
julia = "1"

[extras]
Expand Down
3 changes: 1 addition & 2 deletions src/Clustering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
module Clustering_

import MLJBase
import MLJBase: @mlj_model
using ScientificTypes

import ..Clustering # strange sytax for lazy-loading
Expand All @@ -15,8 +16,6 @@ using LinearAlgebra: norm

const C = Clustering

import ..@mlj_model

const KMeansDescription =
"""
K-Means algorithm: find K centroids corresponding to K clusters in the data.
Expand Down
52 changes: 24 additions & 28 deletions src/MLJModels.jl
Original file line number Diff line number Diff line change
@@ -1,35 +1,25 @@
module MLJModels

using MLJBase, Tables, ScientificTypes
using Requires, Pkg.TOML, OrderedCollections
using StatsBase # countmap is required in metadata

# for administrators to update Metadata.toml:
export @update

# from builtins/Transformers.jl:
export StaticTransformer, FillImputer, FeatureSelector,
UnivariateStandardizer, Standardizer,
UnivariateBoxCoxTransformer,
OneHotEncoder

# from builtins/Constant.jl:
export ConstantRegressor, ConstantClassifier

# from builtins/KNN.jl:
export KNNRegressor

# from loading.jl:
export load, @load, info

# from model_search:
export models, localmodels

using Requires
using OrderedCollections
using MLJBase
using ScientificTypes
using Tables
using ColorTypes
using StatsBase
# from model/Constant
export ConstantRegressor, ConstantClassifier,
DeterministicConstantRegressor, DeterministicConstantClassifier

using Pkg.TOML
# from model/Transformers
export FeatureSelector, StaticTransformer, UnivariateStandardizer,
Standardizer, UnivariateBoxCoxTransformer, OneHotEncoder, FillImputer

const srcdir = dirname(@__FILE__) # the directory containing this file

Expand All @@ -40,25 +30,32 @@ if VERSION < v"1.3"
end
nonmissing = nonmissingtype


include("metadata_utils.jl")

include("metadata.jl")
include("model_search.jl")
include("loading.jl")
include("registry/src/Registry.jl")
import .Registry.@update

# load built-in models:
include("builtins/Transformers.jl")
include("builtins/Constant.jl")

include("parameters_utils.jl")
include("metadata_utils.jl")
include("builtins/Transformers.jl")

const INFO_GIVEN_HANDLE = Dict{Handle,Any}()
const PKGS_GIVEN_NAME = Dict{String,Vector{String}}()
const AMBIGUOUS_NAMES = String[]
const NAMES = String[]

# lazily load in strap-on model interfaces for external packages:
function __init__()
metadata_file = joinpath(srcdir, "registry", "Metadata.toml")

merge!(INFO_GIVEN_HANDLE, info_given_handle(metadata_file))
merge!(PKGS_GIVEN_NAME, pkgs_given_name(INFO_GIVEN_HANDLE))
append!(AMBIGUOUS_NAMES, ambiguous_names(INFO_GIVEN_HANDLE))
append!(NAMES, model_names(INFO_GIVEN_HANDLE))
@info "Model metadata loaded from registry. "

# lazily load in strap-on model interfaces for external packages:
@require MultivariateStats="6f286f6a-111f-5878-ab1e-185364afe411" include("MultivariateStats.jl")
@require DecisionTree="7806a523-6efd-50cb-b5f6-3fa6f1930dbb" include("DecisionTree.jl")
@require GaussianProcesses="891a1506-143c-57d2-908e-e1f8e92e6de9" include("GaussianProcesses.jl")
Expand All @@ -69,7 +66,6 @@ function __init__()
@require XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" include("XGBoost.jl")
@require LIBSVM="b1bec4e5-fd48-53fe-b0cb-9723c09d164b" include("LIBSVM.jl")
@require NearestNeighbors="b8a86587-4115-5ab1-83bc-aa920d37bbce" include("NearestNeighbors.jl")

end

end # module
3 changes: 1 addition & 2 deletions src/MultivariateStats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module MultivariateStats_
export RidgeRegressor, PCA, KernelPCA, ICA

import MLJBase
import MLJBase: @mlj_model
using ScientificTypes
using Tables

Expand All @@ -15,8 +16,6 @@ struct LinearFitresult{F} <: MLJBase.MLJType
bias::F
end

import ..@mlj_model

####
#### RIDGE
####
Expand Down
28 changes: 1 addition & 27 deletions src/NearestNeighbors.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
module NearestNeighbors_

import MLJBase
import MLJBase: @mlj_model, metadata_model, metadata_pkg
using Distances

import ..NearestNeighbors
import ..@mlj_model, ..metadata_pkg, ..metadata_model

const NN = NearestNeighbors

Expand Down Expand Up @@ -70,32 +70,6 @@ end

const KNN = Union{KNNRegressor, KNNClassifier}

function MLJBase.clean!(m::KNN)
warning = ""
if m.K < 1
warning *= "Number of neighbors 'K' needs to be larger than 0. Setting to 1.\n"
m.K = 1
end
if m.leafsize < 0
warning *= "Leaf size should be ≥ 0. Setting to 10.\n"
m.leafsize = 10
end
if m.algorithm ∉ (:kdtree, :brutetree, :balltree)
warning *= "The tree algorithm should be ':kdtree', ':brutetree' or ':balltree'." *
"Setting to ':kdtree'.\n"
m.algorithm = :kdtree
end
if m.algorithm == :kdtree && !isa(m.metric ∉ (Euclidean, Chebyshev, Minkowski, Citiblock))
warning *= "KDTree only supports axis-aligned metrics. Setting to 'Euclidean'.\n"
m.metric = Euclidean()
end
if m.weights ∉ (:uniform, :distance)
warning *= "Weighing should be ':uniform' or ':distance'. Setting to ':uniform'.\n"
m.weights = :distance
end
return warning
end

function MLJBase.fit(m::KNN, verbosity::Int, X, y)
Xmatrix = MLJBase.matrix(X, transpose=true) # NOTE: copies the data
if m.algorithm == :kdtree
Expand Down
5 changes: 2 additions & 3 deletions src/ScikitLearn/ScikitLearn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module ScikitLearn_

#> for all Supervised models:
import MLJBase
import MLJBase: @mlj_model, metadata_model,
_process_model_def, _model_constructor, _model_cleaner
using ScientificTypes
using Tables

Expand All @@ -23,9 +25,6 @@ import ..ScikitLearn

include("svm.jl")

import .._process_model_def, .._model_constructor, .._model_cleaner
import ..metadata_model # metadata_pkg is handled by @sk_model

const Option{T} = Union{Nothing, T}

const SKLM = ((ScikitLearn.Skcore).pyimport("sklearn.linear_model"))
Expand Down
43 changes: 16 additions & 27 deletions src/builtins/Constant.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
# this file defines *and* loads one module

module Constant

export ConstantRegressor, ConstantClassifier
export DeterministicConstantRegressor, DeterministicConstantClassifier
using ..MLJBase, ..Tables
using Distributions

import MLJBase
import MLJBase.nrows
import Distributions
using StatsBase
using Statistics
using CategoricalArrays
using ScientificTypes
export ConstantRegressor, ConstantClassifier,
DeterministicConstantRegressor, DeterministicConstantClassifier

## THE CONSTANT REGRESSOR

Expand All @@ -23,7 +16,7 @@ probability distribution best fitting the training target data. Use
`predict_mean` to predict the mean value instead.

"""
struct ConstantRegressor{D} <: MLJBase.Probabilistic
struct ConstantRegressor{D} <: MLJBase.Probabilistic
distribution_type::Type{D}
end
function ConstantRegressor(; distribution_type=Distributions.Normal)
Expand Down Expand Up @@ -62,8 +55,8 @@ MLJBase.package_uuid(::Type{<:ConstantRegressor}) =
"d491faf4-2d78-11e9-2867-c94bc002c0b7"
MLJBase.package_url(::Type{<:ConstantRegressor}) = "https://github.com/alan-turing-institute/MLJModels.jl"
MLJBase.is_pure_julia(::Type{<:ConstantRegressor}) = true
MLJBase.input_scitype(::Type{<:ConstantRegressor}) = Table(Scientific) # anything goes
MLJBase.target_scitype(::Type{<:ConstantRegressor}) = AbstractVector{Continuous}
MLJBase.input_scitype(::Type{<:ConstantRegressor}) = MLJBase.Table(MLJBase.Scientific) # anything goes
MLJBase.target_scitype(::Type{<:ConstantRegressor}) = AbstractVector{MLJBase.Continuous}


## THE CONSTANT DETERMINISTIC REGRESSOR (FOR TESTING)
Expand All @@ -85,8 +78,8 @@ MLJBase.package_name(::Type{<:DeterministicConstantRegressor}) = MLJBase.package
MLJBase.package_uuid(::Type{<:DeterministicConstantRegressor}) = MLJBase.package_url(ConstantRegressor)
MLJBase.package_url(::Type{<:DeterministicConstantRegressor}) = MLJBase.package_url(ConstantRegressor)
MLJBase.is_pure_julia(::Type{<:DeterministicConstantRegressor}) = true
MLJBase.input_scitype(::Type{<:DeterministicConstantRegressor}) = Table(Scientific) # anything goes
MLJBase.target_scitype(::Type{<:DeterministicConstantRegressor}) = AbstractVector{Continuous}
MLJBase.input_scitype(::Type{<:DeterministicConstantRegressor}) = MLJBase.Table(MLJBase.Scientific) # anything goes
MLJBase.target_scitype(::Type{<:DeterministicConstantRegressor}) = AbstractVector{MLJBase.Continuous}


## THE CONSTANT CLASSIFIER
Expand All @@ -104,7 +97,7 @@ obtain the training target mode instead.
struct ConstantClassifier <: MLJBase.Probabilistic end

function MLJBase.fit(model::ConstantClassifier,
verbosity::Int, X, y)
verbosity::Int, X, y)

fitresult = Distributions.fit(MLJBase.UnivariateFinite, y)

Expand All @@ -127,8 +120,8 @@ MLJBase.package_name(::Type{<:ConstantClassifier}) = MLJBase.package_name(Consta
MLJBase.package_uuid(::Type{<:ConstantClassifier}) = MLJBase.package_uuid(ConstantRegressor)
MLJBase.package_url(::Type{<:ConstantClassifier}) = MLJBase.package_url(ConstantRegressor)
MLJBase.is_pure_julia(::Type{<:ConstantClassifier}) = true
MLJBase.input_scitype(::Type{<:ConstantClassifier}) = Table(Scientific) # anything goes
MLJBase.target_scitype(::Type{<:ConstantClassifier}) = AbstractVector{<:Finite}
MLJBase.input_scitype(::Type{<:ConstantClassifier}) = MLJBase.Table(MLJBase.Scientific) # anything goes
MLJBase.target_scitype(::Type{<:ConstantClassifier}) = AbstractVector{<:MLJBase.Finite}


## DETERMINISTIC CONSTANT CLASSIFIER (FOR TESTING)
Expand Down Expand Up @@ -161,13 +154,9 @@ MLJBase.package_name(::Type{<:DeterministicConstantClassifier}) = MLJBase.packag
MLJBase.package_uuid(::Type{<:DeterministicConstantClassifier}) = MLJBase.package_uuid(ConstantRegressor)
MLJBase.package_url(::Type{<:DeterministicConstantClassifier}) = MLJBase.package_url(ConstantRegressor)
MLJBase.is_pure_julia(::Type{<:DeterministicConstantClassifier}) = true
MLJBase.input_scitype(::Type{<:DeterministicConstantClassifier}) = Table(Scientific) # anything goes
MLJBase.target_scitype(::Type{<:DeterministicConstantClassifier}) = AbstractVector{<:Finite}

MLJBase.input_scitype(::Type{<:DeterministicConstantClassifier}) = MLJBase.Table(MLJBase.Scientific) # anything goes
MLJBase.target_scitype(::Type{<:DeterministicConstantClassifier}) = AbstractVector{<:MLJBase.Finite}

end # module


## EXPOSE THE INTERFACE
end

using .Constant
using MLJModels.Constant
Loading