-
Notifications
You must be signed in to change notification settings - Fork 0
ordinal regression model type & polr engine #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
86d7da5
e54998d
8282e50
ed6d921
c1b56df
c3c51f0
295d5a7
a5e29a3
83a364a
319ff8c
3bb3c4c
a414dd7
f115654
98b3a4c
8fc08fc
920dc2a
0f6ff0c
c965b4b
125fa7b
8e9c60b
e544802
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,27 @@ | ||
Package: ordered | ||
Title: Wrappers for Ordinal Classification Models | ||
Title: 'parsnip' Engines and Wrappers for Ordinal Classification Models | ||
Version: 0.0.0.9000 | ||
Authors@R: c( | ||
person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre"), | ||
comment = c(ORCID = "0000-0003-2402-136X")), | ||
person("Jason Cory", "Brunson", , "[email protected]", role = "aut", | ||
comment = c(ORCID = "0000-0003-3126-9494")), | ||
person("Posit Software PBC", role = "cph") | ||
) | ||
Description: Bindings for ordinal classification models for use with the | ||
'parsnip' package, such as ordinal random forests by Hornung R. (2020) | ||
<doi:10.1007/s00357-018-9302-x> and others. | ||
'parsnip' package, such as the proportional odds logistic regression | ||
implemented in 'MASS' and the ordinal random forests of Hornung (2020) | ||
<doi:10.1007/s00357-018-9302-x>. | ||
License: MIT + file LICENSE | ||
Depends: | ||
parsnip (>= 1.2.1.9003) | ||
Imports: | ||
cli, | ||
dplyr, | ||
rlang (>= 1.1.4) | ||
rlang (>= 1.1.4), | ||
tibble | ||
Suggests: | ||
MASS, | ||
ordinalForest, | ||
QSARdata, | ||
spelling, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# ordered (development version) | ||
|
||
* Initial CRAN submission. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,45 @@ | ||
#' {ordered}: parsnip Engines for Ordinal Regression Models | ||
#' | ||
#' {ordered} provides engines for ordinal regression models for the {parsnip} | ||
#' package. The models may have cumulative, sequential, or adjacent-category | ||
#' structure, and in future these may be disaggregated into separate model | ||
#' types. A vignette will provide thorough illustrations of {ordered} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a vignette 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yet! Should we require a vignette prior to initial submission? |
||
#' functionality. See below for examples of fitting ordinal regression models | ||
#' with {ordered}. | ||
#' | ||
#' @examples | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same things related to "examplesif" and |
||
#' if (rlang::is_installed("MASS")) { | ||
#' | ||
#' # Weighted sample | ||
#' | ||
#' set.seed(561246) | ||
#' house_sub <- MASS::housing |> | ||
#' dplyr::sample_n(size = 120, replace = TRUE, weight = Freq) |> | ||
#' subset(select = -Freq) | ||
#' train_inds <- sample(120, 80) | ||
#' house_train <- house_sub[train_inds, ] | ||
#' house_test <- house_sub[-train_inds, ] | ||
#' | ||
#' # Cumulative-link proportional-odds probit regression model | ||
#' | ||
#' fit_cpop <- ordinal_reg() |> | ||
#' set_engine("polr") |> | ||
#' set_args(method = "probit") |> | ||
#' fit(Sat ~ Infl + Type + Cont, data = house_train) | ||
#' predict(fit_cpop, house_test, type = "prob") | ||
#' | ||
#' if (rlang::is_installed("ordinalForest")) { | ||
#' | ||
#' # Ordinal forest | ||
#' | ||
#' fit_orf <- rand_forest(mode = "classification") |> | ||
#' set_engine("ordinalForest") |> | ||
#' fit(Sat ~ Infl + Type + Cont, data = house_train) | ||
#' predict(fit_orf, house_test, type = "prob") | ||
#' | ||
#' } | ||
#' } | ||
#' | ||
#' @keywords internal | ||
"_PACKAGE" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# These functions define the ordinal regression models. | ||
# They are executed when this package is loaded via `.onLoad()` | ||
# and modify the {parsnip} package's model environment. | ||
|
||
# These functions are tested indirectly when the models are used. | ||
# Since they are added to the parsnip model database on startup execution, | ||
# they can't be test-executed so are excluded from coverage stats. | ||
|
||
# nocov start | ||
|
||
make_ordinal_reg_polr <- function() { | ||
|
||
parsnip::set_model_engine("ordinal_reg", "classification", "polr") | ||
parsnip::set_dependency( | ||
"ordinal_reg", | ||
eng = "polr", | ||
pkg = "ordered", | ||
mode = "classification" | ||
) | ||
|
||
parsnip::set_fit( | ||
model = "ordinal_reg", | ||
eng = "polr", | ||
mode = "classification", | ||
value = list( | ||
interface = "formula", | ||
protect = c("formula", "data", "weights"), | ||
func = c(pkg = "MASS", fun = "polr"), | ||
defaults = list( | ||
method = "logistic" | ||
) | ||
) | ||
) | ||
|
||
parsnip::set_encoding( | ||
model = "ordinal_reg", | ||
eng = "polr", | ||
mode = "classification", | ||
options = list( | ||
predictor_indicators = "traditional", | ||
compute_intercept = TRUE, | ||
remove_intercept = FALSE, | ||
allow_sparse_x = FALSE | ||
) | ||
) | ||
|
||
parsnip::set_pred( | ||
model = "ordinal_reg", | ||
eng = "polr", | ||
mode = "classification", | ||
type = "class", | ||
value = list( | ||
pre = NULL, | ||
post = NULL, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to reconvert the predictions to be ordered factors? |
||
func = c(fun = "predict"), | ||
args = | ||
list( | ||
object = quote(object$fit), | ||
newdata = quote(new_data), | ||
type = "class" | ||
) | ||
) | ||
) | ||
|
||
parsnip::set_pred( | ||
model = "ordinal_reg", | ||
eng = "polr", | ||
mode = "classification", | ||
type = "prob", | ||
value = list( | ||
pre = NULL, | ||
post = function(x, object) { | ||
tibble::as_tibble(x) | ||
}, | ||
func = c(fun = "predict"), | ||
args = | ||
list( | ||
object = quote(object$fit), | ||
newdata = quote(new_data), | ||
type = "probs" | ||
) | ||
) | ||
) | ||
|
||
} | ||
|
||
# nocov end |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Test model type and engine arguments here rather than in {parsnip} if they | ||
# require engines to be loaded. | ||
|
||
test_that("check_args() works", { | ||
skip_if_not_installed("parsnip", "1.2.1.9003") | ||
|
||
# Here for completeness, no checking is done | ||
expect_true(TRUE) | ||
}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
|
||
# model: basic ----------------------------------------------------------------- | ||
|
||
test_that("model object", { | ||
skip_if_not_installed("MASS") | ||
house_sub <- get_house()$sub | ||
|
||
orig_fit <- MASS::polr( | ||
Sat ~ Type + Infl + Cont, | ||
data = house_sub, | ||
model = TRUE | ||
) | ||
|
||
tidy_spec <- ordinal_reg() |> | ||
set_engine("polr") |> | ||
set_mode("classification") | ||
tidy_fit <- fit(tidy_spec, Sat ~ Type + Infl + Cont, data = house_sub) | ||
|
||
# remove `call` from comparison | ||
orig_fit$call <- NULL | ||
tidy_fit$fit$call <- NULL | ||
|
||
expect_equal( | ||
orig_fit, | ||
tidy_fit$fit, | ||
ignore_formula_env = TRUE | ||
) | ||
}) | ||
|
||
# model: case weights ---------------------------------------------------------- | ||
|
||
test_that("case weights", { | ||
skip_if_not_installed("MASS") | ||
house_data <- get_house()$data | ||
|
||
orig_fit <- MASS::polr( | ||
Sat ~ Type + Infl + Cont, | ||
data = house_data, | ||
weights = Freq, | ||
model = TRUE | ||
) | ||
|
||
tidy_spec <- ordinal_reg() |> | ||
set_engine("polr") |> | ||
set_mode("classification") | ||
tidy_data <- transform(house_data, Freq = frequency_weights(Freq)) | ||
tidy_fit <- fit( | ||
tidy_spec, | ||
Sat ~ Type + Infl + Cont, | ||
data = tidy_data, | ||
case_weights = tidy_data$Freq | ||
) | ||
|
||
orig_fit$call <- NULL | ||
tidy_fit$fit$call <- NULL | ||
|
||
expect_equal( | ||
orig_fit, | ||
tidy_fit$fit, | ||
ignore_formula_env = TRUE | ||
) | ||
}) | ||
|
||
# prediction: probability ------------------------------------------------------ | ||
|
||
test_that("probability prediction", { | ||
skip_if_not_installed("MASS") | ||
house_sub <- get_house()$sub | ||
|
||
tidy_fit <- ordinal_reg() |> | ||
set_engine("polr") |> | ||
fit(Sat ~ Type + Cont, data = house_sub) | ||
|
||
orig_pred <- predict(tidy_fit$fit, newdata = house_sub, type = "probs") | ||
orig_pred <- tibble::as_tibble(orig_pred) | ||
orig_pred <- set_names(orig_pred, paste0(".pred_", names(orig_pred))) | ||
tidy_pred <- predict(tidy_fit, house_sub, type = "prob") | ||
expect_equal(orig_pred, tidy_pred) | ||
}) | ||
|
||
# prediction: class ------------------------------------------------------------ | ||
|
||
test_that("class prediction", { | ||
skip_if_not_installed("MASS") | ||
house_sub <- get_house()$sub | ||
|
||
tidy_fit <- ordinal_reg() |> | ||
set_engine("polr") |> | ||
fit(Sat ~ Infl + Cont, data = house_sub) | ||
|
||
orig_pred <- predict(tidy_fit$fit, house_sub) | ||
# NB: `MASS:::predict.polr()` strips order from `object$model$<outcome>`. | ||
orig_pred <- ordered(unname(orig_pred), levels(orig_pred)) | ||
orig_pred <- tibble::tibble(.pred_class = orig_pred) | ||
tidy_pred <- predict(tidy_fit, house_sub) | ||
expect_equal(orig_pred, tidy_pred) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can remove this since it is yours.
I'd also run
usethis::use_tidy_description()
to reorganize this file.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@topepo i'm not sure what you mean by "remove this".
Also, should i mark myself
"cre"
for the time being or wait until someone is ready to submit to CRAN?