diff --git a/DESCRIPTION b/DESCRIPTION index bbc018a48..2e07bb2e0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ Imports: ggplot2, globals, glue, - hardhat (>= 1.4.0.9002), + hardhat (>= 1.4.0.9003), lifecycle, magrittr, pillar, diff --git a/NAMESPACE b/NAMESPACE index 55c9c236f..67ce6a2b4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -377,6 +377,7 @@ importFrom(generics,tune_args) importFrom(generics,varying_args) importFrom(ggplot2,autoplot) importFrom(glue,glue_collapse) +importFrom(hardhat,contr_one_hot) importFrom(hardhat,extract_fit_engine) importFrom(hardhat,extract_fit_time) importFrom(hardhat,extract_parameter_dials) diff --git a/R/contr_one_hot.R b/R/contr_one_hot.R deleted file mode 100644 index 00cdb3484..000000000 --- a/R/contr_one_hot.R +++ /dev/null @@ -1,52 +0,0 @@ -#' Contrast function for one-hot encodings -#' -#' This contrast function produces a model matrix with indicator columns for -#' each level of each factor. -#' -#' @param n A vector of character factor levels (of length >=1) or the number -#' of unique levels (>= 1). -#' @param contrasts This argument is for backwards compatibility and only the -#' default of `TRUE` is supported. -#' @param sparse This argument is for backwards compatibility and only the -#' default of `FALSE` is supported. -#' -#' @includeRmd man/rmd/one-hot.md details -#' -#' @return A diagonal matrix that is `n`-by-`n`. -#' -#' @export -contr_one_hot <- function(n, contrasts = TRUE, sparse = FALSE) { - if (sparse) { - cli::cli_warn("{.code sparse = TRUE} not implemented for {.fun contr_one_hot}.") - } - - if (!contrasts) { - cli::cli_warn("{.code contrasts = FALSE} not implemented for {.fun contr_one_hot}.") - } - - if (is.character(n)) { - if (length(n) < 1) { - cli::cli_abort("{.arg n} cannot be empty.") - } - names <- n - n <- length(names) - } else if (is.numeric(n)) { - check_number_whole(n, min = 1) - n <- as.integer(n) - - if (length(n) != 1L) { - cli::cli_abort("{.arg n} must have length 1 when an integer is provided.") - } - - names <- as.character(seq_len(n)) - } else { - check_number_whole(n, min = 1) - } - - out <- diag(n) - - rownames(out) <- names - colnames(out) <- names - - out -} diff --git a/R/reexports.R b/R/reexports.R index 2760c2a14..8aacdf12e 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -30,6 +30,10 @@ generics::augment #' @export generics::required_pkgs +#' @importFrom hardhat contr_one_hot +#' @export +hardhat::contr_one_hot + #' @importFrom hardhat extract_spec_parsnip #' @export hardhat::extract_spec_parsnip diff --git a/man/contr_one_hot.Rd b/man/contr_one_hot.Rd deleted file mode 100644 index 57ff6654c..000000000 --- a/man/contr_one_hot.Rd +++ /dev/null @@ -1,93 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/contr_one_hot.R -\name{contr_one_hot} -\alias{contr_one_hot} -\title{Contrast function for one-hot encodings} -\usage{ -contr_one_hot(n, contrasts = TRUE, sparse = FALSE) -} -\arguments{ -\item{n}{A vector of character factor levels (of length >=1) or the number -of unique levels (>= 1).} - -\item{contrasts}{This argument is for backwards compatibility and only the -default of \code{TRUE} is supported.} - -\item{sparse}{This argument is for backwards compatibility and only the -default of \code{FALSE} is supported.} -} -\value{ -A diagonal matrix that is \code{n}-by-\code{n}. -} -\description{ -This contrast function produces a model matrix with indicator columns for -each level of each factor. -} -\details{ -By default, \code{model.matrix()} generates binary indicator variables for -factor predictors. When the formula does not remove an intercept, an -incomplete set of indicators are created; no indicator is made for the -first level of the factor. - -For example, \code{species} and \code{island} both have three levels but -\code{model.matrix()} creates two indicator variables for each: - -\if{html}{\out{
}}\preformatted{library(dplyr) -library(modeldata) -data(penguins) - -levels(penguins$species) -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{## [1] "Adelie" "Chinstrap" "Gentoo" -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{levels(penguins$island) -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{## [1] "Biscoe" "Dream" "Torgersen" -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{model.matrix(~ species + island, data = penguins) \%>\% - colnames() -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" -## [5] "islandTorgersen" -}\if{html}{\out{
}} - -For a formula with no intercept, the first factor is expanded to -indicators for \emph{all} factor levels but all other factors are expanded to -all but one (as above): - -\if{html}{\out{
}}\preformatted{model.matrix(~ 0 + species + island, data = penguins) \%>\% - colnames() -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" -## [5] "islandTorgersen" -}\if{html}{\out{
}} - -For inference, this hybrid encoding can be problematic. - -To generate all indicators, use this contrast: - -\if{html}{\out{
}}\preformatted{# Switch out the contrast method -old_contr <- options("contrasts")$contrasts -new_contr <- old_contr -new_contr["unordered"] <- "contr_one_hot" -options(contrasts = new_contr) - -model.matrix(~ species + island, data = penguins) \%>\% - colnames() -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" -## [5] "islandBiscoe" "islandDream" "islandTorgersen" -}\if{html}{\out{
}} - -\if{html}{\out{
}}\preformatted{options(contrasts = old_contr) -}\if{html}{\out{
}} - -Removing the intercept here does not affect the factor encodings. -} diff --git a/man/reexports.Rd b/man/reexports.Rd index f87bde459..18691a919 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -11,6 +11,7 @@ \alias{glance} \alias{augment} \alias{required_pkgs} +\alias{contr_one_hot} \alias{extract_spec_parsnip} \alias{extract_fit_engine} \alias{extract_parameter_set_dials} @@ -31,7 +32,7 @@ below to see their documentation. \item{ggplot2}{\code{\link[ggplot2]{autoplot}}} - \item{hardhat}{\code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_fit_time}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} + \item{hardhat}{\code{\link[hardhat]{contr_one_hot}}, \code{\link[hardhat:hardhat-extract]{extract_fit_engine}}, \code{\link[hardhat:hardhat-extract]{extract_fit_time}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_dials}}, \code{\link[hardhat:hardhat-extract]{extract_parameter_set_dials}}, \code{\link[hardhat:hardhat-extract]{extract_spec_parsnip}}, \code{\link[hardhat]{frequency_weights}}, \code{\link[hardhat]{importance_weights}}, \code{\link[hardhat]{tune}}} \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} }} diff --git a/man/rmd/one-hot.Rmd b/man/rmd/one-hot.Rmd deleted file mode 100644 index baf0b5f4d..000000000 --- a/man/rmd/one-hot.Rmd +++ /dev/null @@ -1,47 +0,0 @@ -```{r load, include = FALSE} -library(dplyr) -``` - -By default, `model.matrix()` generates binary indicator variables for factor predictors. When the formula does not remove an intercept, an incomplete set of indicators are created; no indicator is made for the first level of the factor. - -For example, `species` and `island` both have three levels but `model.matrix()` creates two indicator variables for each: - -```{r ref-cell} -library(dplyr) -library(modeldata) -data(penguins) - -levels(penguins$species) -levels(penguins$island) - -model.matrix(~ species + island, data = penguins) %>% - colnames() -``` - -For a formula with no intercept, the first factor is expanded to indicators for _all_ factor levels but all other factors are expanded to all but one (as above): - -```{r hybrid} -model.matrix(~ 0 + species + island, data = penguins) %>% - colnames() -``` - -For inference, this hybrid encoding can be problematic. - -To generate all indicators, use this contrast: - -```{r one-hot} -# Switch out the contrast method -old_contr <- options("contrasts")$contrasts -new_contr <- old_contr -new_contr["unordered"] <- "contr_one_hot" -options(contrasts = new_contr) - -model.matrix(~ species + island, data = penguins) %>% - colnames() - -options(contrasts = old_contr) -``` - -Removing the intercept here does not affect the factor encodings. - - diff --git a/man/rmd/one-hot.md b/man/rmd/one-hot.md deleted file mode 100644 index b05a49315..000000000 --- a/man/rmd/one-hot.md +++ /dev/null @@ -1,78 +0,0 @@ - - -By default, `model.matrix()` generates binary indicator variables for factor predictors. When the formula does not remove an intercept, an incomplete set of indicators are created; no indicator is made for the first level of the factor. - -For example, `species` and `island` both have three levels but `model.matrix()` creates two indicator variables for each: - - -``` r -library(dplyr) -library(modeldata) -data(penguins) - -levels(penguins$species) -``` - -``` -## [1] "Adelie" "Chinstrap" "Gentoo" -``` - -``` r -levels(penguins$island) -``` - -``` -## [1] "Biscoe" "Dream" "Torgersen" -``` - -``` r -model.matrix(~ species + island, data = penguins) %>% - colnames() -``` - -``` -## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" -## [5] "islandTorgersen" -``` - -For a formula with no intercept, the first factor is expanded to indicators for _all_ factor levels but all other factors are expanded to all but one (as above): - - -``` r -model.matrix(~ 0 + species + island, data = penguins) %>% - colnames() -``` - -``` -## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" -## [5] "islandTorgersen" -``` - -For inference, this hybrid encoding can be problematic. - -To generate all indicators, use this contrast: - - -``` r -# Switch out the contrast method -old_contr <- options("contrasts")$contrasts -new_contr <- old_contr -new_contr["unordered"] <- "contr_one_hot" -options(contrasts = new_contr) - -model.matrix(~ species + island, data = penguins) %>% - colnames() -``` - -``` -## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" -## [5] "islandBiscoe" "islandDream" "islandTorgersen" -``` - -``` r -options(contrasts = old_contr) -``` - -Removing the intercept here does not affect the factor encodings. - - diff --git a/tests/testthat/_snaps/contr_one_hot.md b/tests/testthat/_snaps/contr_one_hot.md deleted file mode 100644 index f677fd5a6..000000000 --- a/tests/testthat/_snaps/contr_one_hot.md +++ /dev/null @@ -1,48 +0,0 @@ -# one-hot encoding contrasts - - Code - contr_one_hot(character(0)) - Condition - Error in `contr_one_hot()`: - ! `n` cannot be empty. - ---- - - Code - contr_one_hot(-1) - Condition - Error in `contr_one_hot()`: - ! `n` must be a whole number larger than or equal to 1, not the number -1. - ---- - - Code - contr_one_hot(list()) - Condition - Error in `contr_one_hot()`: - ! `n` must be a whole number, not an empty list. - ---- - - Code - contr_one_hot(2, contrast = FALSE) - Condition - Warning: - `contrasts = FALSE` not implemented for `contr_one_hot()`. - Output - 1 2 - 1 1 0 - 2 0 1 - ---- - - Code - contr_one_hot(2, sparse = TRUE) - Condition - Warning: - `sparse = TRUE` not implemented for `contr_one_hot()`. - Output - 1 2 - 1 1 0 - 2 0 1 - diff --git a/tests/testthat/test-contr_one_hot.R b/tests/testthat/test-contr_one_hot.R deleted file mode 100644 index 3f1d78857..000000000 --- a/tests/testthat/test-contr_one_hot.R +++ /dev/null @@ -1,19 +0,0 @@ -test_that('one-hot encoding contrasts', { - contr_mat <- contr_one_hot(12) - expect_equal(colnames(contr_mat), paste(1:12)) - expect_equal(rownames(contr_mat), paste(1:12)) - expect_true(all(apply(contr_mat, 1, sum) == 1)) - expect_true(all(apply(contr_mat, 2, sum) == 1)) - - chr_contr_mat <- contr_one_hot(letters[1:12]) - expect_equal(colnames(chr_contr_mat), letters[1:12]) - expect_equal(rownames(chr_contr_mat), letters[1:12]) - expect_true(all(apply(chr_contr_mat, 1, sum) == 1)) - expect_true(all(apply(chr_contr_mat, 2, sum) == 1)) - - expect_snapshot(contr_one_hot(character(0)), error = TRUE) - expect_snapshot(contr_one_hot(-1), error = TRUE) - expect_snapshot(contr_one_hot(list()), error = TRUE) - expect_snapshot(contr_one_hot(2, contrast = FALSE)) - expect_snapshot(contr_one_hot(2, sparse = TRUE)) -})