From 3882350fd8a77980301e8ce4dc676017713c601f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 5 Mar 2025 23:44:08 +0000 Subject: [PATCH 01/28] use maybe_write_content for easier 'mocking' --- R/expect_lint.R | 21 +++++++++++--------- tests/testthat/test-get_source_expressions.R | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/expect_lint.R b/R/expect_lint.R index c8e81cafa..30e377d7c 100644 --- a/R/expect_lint.R +++ b/R/expect_lint.R @@ -47,15 +47,7 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") { old_lang <- set_lang(language) on.exit(reset_lang(old_lang)) - if (is.null(file)) { - file <- tempfile() - on.exit(unlink(file), add = TRUE) - local({ - con <- base::file(file, encoding = "UTF-8") - on.exit(close(con)) - writeLines(content, con = con, sep = "\n") - }) - } + file <- maybe_write_content(file, content) lints <- lint(file, ...) n_lints <- length(lints) @@ -121,6 +113,17 @@ expect_no_lint <- function(content, ..., file = NULL, language = "en") { expect_lint(content, NULL, ..., file = file, language = language) } +maybe_write_content <- function(file, lines) { + if (!is.null(file)) { + return(file) + } + tmp <- tempfile() + con <- file(tmp, encoding = "UTF-8") + on.exit(close(con)) + writeLines(lines, con = con, sep = "\n") + tmp +} + #' Test that the package is lint free #' #' This function is a thin wrapper around lint_package that simply tests there are no diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R index eeaff905b..64dbd548d 100644 --- a/tests/testthat/test-get_source_expressions.R +++ b/tests/testthat/test-get_source_expressions.R @@ -105,6 +105,7 @@ test_that("Multi-byte character truncated by parser is ignored", { }) test_that("Can read non UTF-8 file", { + withr::local_options(list(lintr.linter_file = tempfile())) proj_dir <- test_path("dummy_projects", "project") withr::local_dir(proj_dir) expect_no_lint(file = "cp1252.R", linters = list()) From c392c535066ea661051956aaccd9e09e44596f7f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 01:00:16 +0000 Subject: [PATCH 02/28] initial progress --- .dev/ast_fuzz_test.R | 43 +++++++++++++++++++++++++++++++++ .dev/maybe_fuzz_content.R | 50 +++++++++++++++++++++++++++++++++++++++ R/expect_lint.R | 48 ++++++++++++++++++++++++++++++++++++- 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 .dev/ast_fuzz_test.R create mode 100644 .dev/maybe_fuzz_content.R diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R new file mode 100644 index 000000000..cd330d210 --- /dev/null +++ b/.dev/ast_fuzz_test.R @@ -0,0 +1,43 @@ +# Fuzz testing for lint consistency +# +# We have often encountered issues where we handle +# equivalent R constructs inconsistently, e.g., +# function(...) should almost always match the same +# rules as \(...), and '<-' assignment should almost +# always be equivalent to '='. +# +# Here, we seek to enforce that (under eventual consistency) +# by randomly altering the contents of files encountered +# under expect_lint() to swap known equivalencies. + +expect_lint_file <- "R/expect_lint.R" + +original <- readLines(expect_lint_file) +expected_line <- "file <- maybe_write_content(file, content)" +expected_line_idx <- grep(expected_line, original, fixed = TRUE) +if (length(expected_line_idx) != 1L) { + stop(sprintf( + "Please update this workflow -- need exactly one hit for line '%s' in file '%s'.", + expected_line, expect_lint_file + )) +} +writeLines( + c( + head(original, expected_line_idx-1L), + # overwrite original exit hook to always delete the fuzzed file + "on.exit({reset_lang(old_lang); unlink(file)})", + "file <- maybe_fuzz_content(file, content)", + tail(original, -expected_line_idx), + readLines(".dev/maybe_fuzz_content.R") + ), + expect_lint_file +) +# Not useful in CI but good when running locally. +withr::defer({ + writeLines(original, expect_lint_file) + pkgload::load_all() +}) + +pkgload::load_all() + +testthat::test_dir("tests") diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R new file mode 100644 index 000000000..fffce7d1f --- /dev/null +++ b/.dev/maybe_fuzz_content.R @@ -0,0 +1,50 @@ +maybe_fuzz_content <- function(file, lines) { + new_file <- tempfile() + if (is.null(file)) { + con <- file(new_file, encoding = "UTF-8") + writeLines(lines, con = con, sep = "\n") + close(con) + } else { + file.copy(file, new_file, copy.mode = FALSE) + } + + fuzz_contents(new_file) + + new_file +} + +fuzz_contents <- function(f) { + pd <- getParseData(parse(f, keep.source = TRUE)) + + fun_tokens <- c("'\\\\'", "FUNCTION") + fun_idx <- which(pd$token %in% fun_tokens) + n_fun <- length(fun_idx) + + if (n_fun == 0L) { + return(invisible()) + } + + pd$new_token[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE) + + l <- readLines(f) + + replacement_map <- c(FUNCTION = " \\", `'\\\\'` = "function") + for (ii in rev(fun_idx)) { + if (pd$token[ii] == pd$new_token[ii]) next + browser() + ptn = rex::rex( + start, + capture(n_times(anything, pd$col1[ii] - 1L), name = "prefix"), + pd$text[ii] + ) + l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, replacement_map[pd$token[ii]]) + } + + + start <- pd$col1[fun_idx] + substr(l[pd$line1[fun_idx]], start, start + nchar("function") - 1L) <- replacement_map[pd$token[fun_idx]] + + writeLines(l, f) + + invisible() +} diff --git a/R/expect_lint.R b/R/expect_lint.R index 30e377d7c..5d321354a 100644 --- a/R/expect_lint.R +++ b/R/expect_lint.R @@ -47,7 +47,9 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") { old_lang <- set_lang(language) on.exit(reset_lang(old_lang)) - file <- maybe_write_content(file, content) + if (is.null(file)) on.exit(unlink(file), add = TRUE) +on.exit({reset_lang(old_lang); unlink(file)}) +file <- maybe_fuzz_content(file, content) lints <- lint(file, ...) n_lints <- length(lints) @@ -165,3 +167,47 @@ require_testthat <- function() { ) } } +maybe_fuzz_content <- function(file, lines) { + new_file <- tempfile() + if (is.null(file)) { + con <- file(new_file, encoding = "UTF-8") + writeLines(lines, con = con, sep = "\n") + close(con) + } else { + file.copy(file, new_file, copy.mode = FALSE) + } + + fuzz_contents(new_file) + + new_file +} + +fuzz_contents <- function(f) { + pd <- getParseData(parse(f, keep.source = TRUE)) + + fun_tokens <- c("'\\\\'", "FUNCTION") + fun_idx <- which(pd$token %in% fun_tokens) + n_fun <- length(fun_idx) + + if (n_fun == 0L) { + return(invisible()) + } + + pd$new_token[fun_idx] <- sample(fun_tokens, length(fun_idx), replace = TRUE) + + l <- readLines(f) + + for (ii in rev(fun_idx)) { + if (pd$token[ii] == pd$new_token[ii]) next + browser() + ptn = paste0("^(.{", pd$col1 - 1L, "})") + } + + replacement_map <- c(FUNCTION = " \\", `'\\\\'` = "function") + start <- pd$col1[fun_idx] + substr(l[pd$line1[fun_idx]], start, start + nchar("function") - 1L) <- replacement_map[pd$token[fun_idx]] + + writeLines(l, f) + + invisible() +} From 5cef281b62b7a6ba61466b10a9ec9cf3bf58e430 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 17:35:37 +0000 Subject: [PATCH 03/28] getting very close i think... --- .dev/ast_fuzz_test.R | 4 ++-- .dev/maybe_fuzz_content.R | 10 +++------- R/expect_lint.R | 3 +-- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index cd330d210..46be6a27c 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -25,8 +25,8 @@ writeLines( c( head(original, expected_line_idx-1L), # overwrite original exit hook to always delete the fuzzed file - "on.exit({reset_lang(old_lang); unlink(file)})", - "file <- maybe_fuzz_content(file, content)", + " on.exit({reset_lang(old_lang); unlink(file)})", + " file <- maybe_fuzz_content(file, content)", tail(original, -expected_line_idx), readLines(".dev/maybe_fuzz_content.R") ), diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index fffce7d1f..b5af3cb09 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -24,26 +24,22 @@ fuzz_contents <- function(f) { return(invisible()) } + pd$new_token <- NA_character_ pd$new_token[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE) l <- readLines(f) - replacement_map <- c(FUNCTION = " \\", `'\\\\'` = "function") + replacement_map <- c(FUNCTION = "\\", `'\\\\'` = "function") for (ii in rev(fun_idx)) { if (pd$token[ii] == pd$new_token[ii]) next - browser() ptn = rex::rex( start, capture(n_times(anything, pd$col1[ii] - 1L), name = "prefix"), pd$text[ii] ) - l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, replacement_map[pd$token[ii]]) + l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, paste0("\\1", rex::rex(replacement_map[pd$token[ii]]))) } - - start <- pd$col1[fun_idx] - substr(l[pd$line1[fun_idx]], start, start + nchar("function") - 1L) <- replacement_map[pd$token[fun_idx]] - writeLines(l, f) invisible() diff --git a/R/expect_lint.R b/R/expect_lint.R index 5d321354a..811dda0ea 100644 --- a/R/expect_lint.R +++ b/R/expect_lint.R @@ -48,8 +48,7 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") { on.exit(reset_lang(old_lang)) if (is.null(file)) on.exit(unlink(file), add = TRUE) -on.exit({reset_lang(old_lang); unlink(file)}) -file <- maybe_fuzz_content(file, content) + file <- maybe_write_content(file, content) lints <- lint(file, ...) n_lints <- length(lints) From a4e4a66f36a7bbe4d8db6fab56d90f58ef9449b2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 17:41:43 +0000 Subject: [PATCH 04/28] skip Rmd files --- .dev/maybe_fuzz_content.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index b5af3cb09..3f2969f66 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -14,7 +14,11 @@ maybe_fuzz_content <- function(file, lines) { } fuzz_contents <- function(f) { - pd <- getParseData(parse(f, keep.source = TRUE)) + pd <- tryCatch(getParseData(parse(f, keep.source = TRUE)), error = identity) + # e.g. Rmd files. We could use get_source_expressions(), but with little benefit & much slower. + if (inherits(pd, "error")) { + return(invisible()) + } fun_tokens <- c("'\\\\'", "FUNCTION") fun_idx <- which(pd$token %in% fun_tokens) From 0b1eaf5e5984928ded4fc4ac44357c9d12b14b4e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 17:43:06 +0000 Subject: [PATCH 05/28] caught a live one! --- R/terminal_close_linter.R | 2 +- tests/testthat/test-terminal_close_linter.R | 36 +++++++++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/R/terminal_close_linter.R b/R/terminal_close_linter.R index 20b86ac64..7c4a44285 100644 --- a/R/terminal_close_linter.R +++ b/R/terminal_close_linter.R @@ -39,7 +39,7 @@ #' @export terminal_close_linter <- make_linter_from_xpath( xpath = " - //FUNCTION + (//FUNCTION | //OP-LAMBDA) /following-sibling::expr /expr[last()][ expr/SYMBOL_FUNCTION_CALL[text() = 'close'] diff --git a/tests/testthat/test-terminal_close_linter.R b/tests/testthat/test-terminal_close_linter.R index 697804f68..b20149005 100644 --- a/tests/testthat/test-terminal_close_linter.R +++ b/tests/testthat/test-terminal_close_linter.R @@ -9,7 +9,7 @@ test_that("terminal_close_linter skips allowed cases", { return(invisible()) } ") - expect_lint(lines, NULL, linter) + expect_no_lint(lines, linter) lines <- trim_some(" foo <- function(bar) { @@ -17,7 +17,15 @@ test_that("terminal_close_linter skips allowed cases", { return(close) } ") - expect_lint(lines, NULL, linter) + expect_no_lint(lines, linter) + + lines <- trim_some(" + foo <- \\(bar) { + close <- bar + 1 + return(close) + } + ") + expect_no_lint(lines, linter) lines <- trim_some(" foo <- function(bar) { @@ -25,7 +33,7 @@ test_that("terminal_close_linter skips allowed cases", { close } ") - expect_lint(lines, NULL, linter) + expect_no_lint(lines, linter) }) test_that("terminal_close_linter blocks simple cases", { @@ -72,3 +80,25 @@ test_that("terminal_close_linter blocks simple cases", { linter ) }) + +test_that("lints vectorize", { + expect_lint( + trim_some("{ + foo <- function() { + tmp <- file(tempfile()) + writeLines(letters, tmp) + close(tmp) + } + bar <- \\() { + tmp <- file(tempfile()) + writeLines(letters, tmp) + close(tmp) + } + }"), + list( + list("close connections", line_number = 5L), + list("close connections", line_number = 10L) + ), + terminal_close_linter() + ) +}) From 868ad3079a4b287b3e88701d99b8410fc3403d12 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 17:55:25 +0000 Subject: [PATCH 06/28] need to match original file extension? --- .dev/maybe_fuzz_content.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index 3f2969f66..60bb6588c 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -1,10 +1,11 @@ maybe_fuzz_content <- function(file, lines) { - new_file <- tempfile() if (is.null(file)) { + new_file <- tempfile() con <- file(new_file, encoding = "UTF-8") writeLines(lines, con = con, sep = "\n") close(con) } else { + new_file <- tempfile(fileext = tools::file_ext(file)) file.copy(file, new_file, copy.mode = FALSE) } From 0ed5cc042506832b19587161899e9d80aea9c988 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 17:58:44 +0000 Subject: [PATCH 07/28] caught another one! --- R/library_call_linter.R | 4 ++-- tests/testthat/test-library_call_linter.R | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/library_call_linter.R b/R/library_call_linter.R index fb5c20e80..b87297d8e 100644 --- a/R/library_call_linter.R +++ b/R/library_call_linter.R @@ -111,7 +111,7 @@ library_call_linter <- function(allow_preamble = TRUE) { expr[2][STR_CONST] or ( SYMBOL_SUB[text() = 'character.only'] - and not(ancestor::expr[FUNCTION]) + and not(ancestor::expr[FUNCTION or OP-LAMBDA]) ) ] ") @@ -122,7 +122,7 @@ library_call_linter <- function(allow_preamble = TRUE) { //SYMBOL_FUNCTION_CALL[{ xp_text_in_table(bad_indirect_funs) }] /parent::expr /parent::expr[ - not(ancestor::expr[FUNCTION]) + not(ancestor::expr[FUNCTION or OP-LAMBDA]) and expr[{ call_symbol_cond }] ] ") diff --git a/tests/testthat/test-library_call_linter.R b/tests/testthat/test-library_call_linter.R index fc5b53367..f43790b36 100644 --- a/tests/testthat/test-library_call_linter.R +++ b/tests/testthat/test-library_call_linter.R @@ -220,7 +220,9 @@ test_that("skips allowed usages of library()/character.only=TRUE", { expect_no_lint("library(data.table)", linter) expect_no_lint("function(pkg) library(pkg, character.only = TRUE)", linter) + expect_no_lint("\\(pkg) library(pkg, character.only = TRUE)", linter) expect_no_lint("function(pkgs) sapply(pkgs, require, character.only = TRUE)", linter) + expect_no_lint("\\(pkgs) sapply(pkgs, require, character.only = TRUE)", linter) }) test_that("blocks disallowed usages of strings in library()/require()", { From 99d00a36548bcd27572e97261ed6d6ee5fe697bf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 18:29:58 +0000 Subject: [PATCH 08/28] simpler approach, avoid rex() due to bug --- .dev/maybe_fuzz_content.R | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index 60bb6588c..16c5e566c 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -21,28 +21,24 @@ fuzz_contents <- function(f) { return(invisible()) } - fun_tokens <- c("'\\\\'", "FUNCTION") - fun_idx <- which(pd$token %in% fun_tokens) + fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function") + fun_idx <- which(pd$token %in% names(fun_tokens)) n_fun <- length(fun_idx) if (n_fun == 0L) { return(invisible()) } - pd$new_token <- NA_character_ - pd$new_token[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE) + pd$new_text <- NA_character_ + pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE) l <- readLines(f) - replacement_map <- c(FUNCTION = "\\", `'\\\\'` = "function") for (ii in rev(fun_idx)) { - if (pd$token[ii] == pd$new_token[ii]) next - ptn = rex::rex( - start, - capture(n_times(anything, pd$col1[ii] - 1L), name = "prefix"), - pd$text[ii] - ) - l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, paste0("\\1", rex::rex(replacement_map[pd$token[ii]]))) + if (pd$text[ii] == pd$new_text[ii]) next + # Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96 + ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii])) + l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii]))) } writeLines(l, f) From d3cca7ad7a5af660f0544e7bc329586ff8db53f7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 19:35:29 +0000 Subject: [PATCH 09/28] also ignore warnings --- .dev/maybe_fuzz_content.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index 16c5e566c..b14a90aeb 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -15,8 +15,9 @@ maybe_fuzz_content <- function(file, lines) { } fuzz_contents <- function(f) { - pd <- tryCatch(getParseData(parse(f, keep.source = TRUE)), error = identity) - # e.g. Rmd files. We could use get_source_expressions(), but with little benefit & much slower. + # skip errors for e.g. Rmd files, and ignore warnings. + # We could use get_source_expressions(), but with little benefit & much slower. + pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity) if (inherits(pd, "error")) { return(invisible()) } From 59dc1b02ead3249c3f914f85dd78dfffcbcd8bb5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 19:59:09 +0000 Subject: [PATCH 10/28] finally getting somewhere... --- .dev/ast_fuzz_test.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 46be6a27c..993be3b6b 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -40,4 +40,20 @@ withr::defer({ pkgload::load_all() -testthat::test_dir("tests") +reporter <- testthat::SummaryReporter$new() +testthat::test_local(reporter = reporter) + +failures <- reporter$failures$as_list() +valid_failure <- vapply( + failures, + function(failure) { + if (grepl("column_number [0-9]+L? did not match", failure$message)) { + return(TRUE) + } + FALSE + }, + logical(1L) +) +for (failure in failures) { + +} From a25065f7244f3d889b4335dea4401a092577ea3c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 20:48:40 +0000 Subject: [PATCH 11/28] progressively more complicated :( --- .dev/ast_fuzz_test.R | 46 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 993be3b6b..802c19eeb 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -40,6 +40,46 @@ withr::defer({ pkgload::load_all() +test_restorations <- list() +for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) { + xml <- xml2::read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE))) + # parent::* to catch top-level comments (exprlist) + nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*") + if (length(nofuzz_lines) == 0L) next + + original <- test_lines <- readLines(test_file) + + for (nofuzz_line in nofuzz_lines) { + comments <- xml2::xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]") + comment_text <- xml2::xml_text(comments) + start_idx <- grep("nofuzz start", comment_text, fixed = TRUE) + end_idx <- grep("nofuzz end", comment_text, fixed = TRUE) + if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) { + stop(sprintf( + "Mismatched '# nofuzz start' (%s), '# nofuzz end' (%s) in %s", + toString(start_idx), toString(end_idx), test_file + )) + } + + comment_range <- Map(`:`, + as.integer(xml2::xml_attr(comments[start_idx], "line1")), + as.integer(xml2::xml_attr(comments[end_idx], "line1")) + ) + for (comment_range in comment_ranges) { + test_lines[comment_range] <- paste("#", test_lines[comment_range]) + } + + if (!any(!start_idx & !end_idx)) next + + comment_range <- as.integer(xml2::xml_attr(nofuzz_line, "line1")):as.integer(xml2::xml_attr(nofuzz_line, "line2")) + test_lines[comment_range] <- paste("#", test_lines[comment_range]) + } + + writeLines(test_lines, test_file) + test_restorations <- c(test_restorations, list(list(file = test_file, lines = original))) +} +withr::defer(for (restoration in test_restorations) writeLines(restoration$original, restoration$file)) + reporter <- testthat::SummaryReporter$new() testthat::test_local(reporter = reporter) @@ -50,10 +90,10 @@ valid_failure <- vapply( if (grepl("column_number [0-9]+L? did not match", failure$message)) { return(TRUE) } + if (grepl("ranges list[(].* did not match", failure$message)) { + return(TRUE) + } FALSE }, logical(1L) ) -for (failure in failures) { - -} From 491a3405ae88761410a5ee2b9c5444228617ec1d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 20:58:02 +0000 Subject: [PATCH 12/28] round of fixes & first working nofuzz --- .dev/ast_fuzz_test.R | 7 +++++-- tests/testthat/test-exclusions.R | 2 +- tests/testthat/test-get_source_expressions.R | 21 +++++++++++--------- tests/testthat/test-indentation_linter.R | 2 ++ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 802c19eeb..9cc411084 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -61,7 +61,7 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = )) } - comment_range <- Map(`:`, + comment_ranges <- Map(`:`, as.integer(xml2::xml_attr(comments[start_idx], "line1")), as.integer(xml2::xml_attr(comments[end_idx], "line1")) ) @@ -69,8 +69,11 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = test_lines[comment_range] <- paste("#", test_lines[comment_range]) } - if (!any(!start_idx & !end_idx)) next + if (length(start_idx) > 0L && !any(!start_idx & !end_idx)) next + # NB: one-line tests line expect_lint(...) # nofuzz are not supported, + # since the comment will attach to the parent test_that() & thus comment + # out the whole unit. Easiest solution is just to spread out those few tests for now. comment_range <- as.integer(xml2::xml_attr(nofuzz_line, "line1")):as.integer(xml2::xml_attr(nofuzz_line, "line2")) test_lines[comment_range] <- paste("#", test_lines[comment_range]) } diff --git a/tests/testthat/test-exclusions.R b/tests/testthat/test-exclusions.R index 9fce8adf5..9bfddc81e 100644 --- a/tests/testthat/test-exclusions.R +++ b/tests/testthat/test-exclusions.R @@ -142,7 +142,7 @@ test_that("#1442: is_excluded_files works if no global exclusions are specified" ) # 3 lints: assignment_linter(), quotes_linter() and line_length_linter() - expect_lint( + expect_lint( # nofuzz file = file.path(tmp, "bad.R"), checks = list( list(linter = "assignment_linter", line_number = 1L), diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R index 64dbd548d..d99016d78 100644 --- a/tests/testthat/test-get_source_expressions.R +++ b/tests/testthat/test-get_source_expressions.R @@ -108,7 +108,10 @@ test_that("Can read non UTF-8 file", { withr::local_options(list(lintr.linter_file = tempfile())) proj_dir <- test_path("dummy_projects", "project") withr::local_dir(proj_dir) - expect_no_lint(file = "cp1252.R", linters = list()) + expect_no_lint( # nofuzz + file = "cp1252.R", + linters = list() + ) }) test_that("Warns if encoding is misspecified, Pt. 1", { @@ -142,14 +145,14 @@ test_that("Warns if encoding is misspecified, Pt. 1", { test_that("Can extract line number from parser errors", { with_content_to_parse( - trim_some(' - "ok" - R"---a---" - '), - { - expect_identical(error$message, "Malformed raw string literal.") - expect_identical(error$line_number, 2L) - } +# # trim_some(' +# # "ok" +# # R"---a---" +# # '), +# # { +# # expect_identical(error$message, "Malformed raw string literal.") +# # expect_identical(error$line_number, 2L) +# # } ) with_content_to_parse( diff --git a/tests/testthat/test-indentation_linter.R b/tests/testthat/test-indentation_linter.R index 4dd640389..10550eaff 100644 --- a/tests/testthat/test-indentation_linter.R +++ b/tests/testthat/test-indentation_linter.R @@ -152,6 +152,7 @@ test_that("indentation linter flags improper closing curly braces", { ) }) +# nofuzz start test_that("function argument indentation works in tidyverse-style", { linter <- indentation_linter() expect_no_lint( @@ -260,6 +261,7 @@ test_that("function argument indentation works in tidyverse-style", { linter ) }) +# nofuzz end test_that("function argument indentation works in always-hanging-style", { linter <- indentation_linter(hanging_indent_style = "always") From 92f0628ff64cd109e87120392ffe7add7992d232 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 21:03:19 +0000 Subject: [PATCH 13/28] looks like we got another live one... break time --- tests/testthat/test-indentation_linter.R | 2 +- tests/testthat/test-object_usage_linter.R | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-indentation_linter.R b/tests/testthat/test-indentation_linter.R index 10550eaff..229f78cbe 100644 --- a/tests/testthat/test-indentation_linter.R +++ b/tests/testthat/test-indentation_linter.R @@ -261,7 +261,6 @@ test_that("function argument indentation works in tidyverse-style", { linter ) }) -# nofuzz end test_that("function argument indentation works in always-hanging-style", { linter <- indentation_linter(hanging_indent_style = "always") @@ -357,6 +356,7 @@ test_that("function argument indentation works in always-hanging-style", { linter ) }) +# nofuzz end test_that("indentation with operators works", { linter <- indentation_linter() diff --git a/tests/testthat/test-object_usage_linter.R b/tests/testthat/test-object_usage_linter.R index a12620008..3472fec3a 100644 --- a/tests/testthat/test-object_usage_linter.R +++ b/tests/testthat/test-object_usage_linter.R @@ -177,10 +177,10 @@ test_that("calls with top level function definitions are ignored", { test_that("object-usage line-numbers are relative to start-of-file", { expect_lint( trim_some(" - a <- function(y) { + a <- \\(y) { y ** 2 } - b <- function() { + b <- \\() { x } "), @@ -672,6 +672,8 @@ test_that("messages without a quoted name are caught", { # See #1914 test_that("symbols in formulas aren't treated as 'undefined global'", { + linter <- object_usage_linter() + expect_lint( trim_some(" foo <- function(x) { @@ -686,7 +688,7 @@ test_that("symbols in formulas aren't treated as 'undefined global'", { line_number = 4L, column_number = 21L ), - object_usage_linter() + linter ) # neither on the RHS @@ -704,7 +706,7 @@ test_that("symbols in formulas aren't treated as 'undefined global'", { line_number = 4L, column_number = 21L ), - object_usage_linter() + linter ) # nor in nested expressions @@ -722,7 +724,7 @@ test_that("symbols in formulas aren't treated as 'undefined global'", { line_number = 4L, column_number = 21L ), - object_usage_linter() + linter ) # nor as a call @@ -743,7 +745,7 @@ test_that("symbols in formulas aren't treated as 'undefined global'", { line_number = 4L, column_number = 21L ), - object_usage_linter() + linter ) }) From d387a715ee5ce35669d2f9c6beef0129b93e0024 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 21:40:23 +0000 Subject: [PATCH 14/28] another true positive --- R/unnecessary_lambda_linter.R | 2 +- tests/testthat/test-object_usage_linter.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/unnecessary_lambda_linter.R b/R/unnecessary_lambda_linter.R index 007b3c831..f2f62232d 100644 --- a/R/unnecessary_lambda_linter.R +++ b/R/unnecessary_lambda_linter.R @@ -93,7 +93,7 @@ unnecessary_lambda_linter <- function(allow_comparison = FALSE) { # NB: this includes 0+3 and TRUE+FALSE, which are also fine. inner_comparison_xpath <- glue(" parent::expr - /expr[FUNCTION] + /expr[FUNCTION or OP-LAMBDA] /expr[ ({ xp_or(infix_metadata$xml_tag[infix_metadata$comparator]) }) and expr[ diff --git a/tests/testthat/test-object_usage_linter.R b/tests/testthat/test-object_usage_linter.R index 3472fec3a..d2c58371e 100644 --- a/tests/testthat/test-object_usage_linter.R +++ b/tests/testthat/test-object_usage_linter.R @@ -177,10 +177,10 @@ test_that("calls with top level function definitions are ignored", { test_that("object-usage line-numbers are relative to start-of-file", { expect_lint( trim_some(" - a <- \\(y) { + a <- function(y) { y ** 2 } - b <- \\() { + b <- function() { x } "), From e150ffe73791da5d70b863c3c5ce5d87eb8c46c9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:01:14 +0000 Subject: [PATCH 15/28] more ignores, need '.' in file extension, restore test --- .dev/ast_fuzz_test.R | 11 ++++------- .dev/maybe_fuzz_content.R | 2 +- tests/testthat/test-get_source_expressions.R | 16 ++++++++-------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 9cc411084..84f263d7d 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -47,7 +47,7 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*") if (length(nofuzz_lines) == 0L) next - original <- test_lines <- readLines(test_file) + test_original <- test_lines <- readLines(test_file) for (nofuzz_line in nofuzz_lines) { comments <- xml2::xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]") @@ -79,9 +79,9 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = } writeLines(test_lines, test_file) - test_restorations <- c(test_restorations, list(list(file = test_file, lines = original))) + test_restorations <- c(test_restorations, list(list(file = test_file, lines = test_original))) } -withr::defer(for (restoration in test_restorations) writeLines(restoration$original, restoration$file)) +withr::defer(for (restoration in test_restorations) writeLines(restoration$lines, restoration$file)) reporter <- testthat::SummaryReporter$new() testthat::test_local(reporter = reporter) @@ -90,10 +90,7 @@ failures <- reporter$failures$as_list() valid_failure <- vapply( failures, function(failure) { - if (grepl("column_number [0-9]+L? did not match", failure$message)) { - return(TRUE) - } - if (grepl("ranges list[(].* did not match", failure$message)) { + if (grepl('(column_number|ranges|line) .* did not match', failure$message)) { return(TRUE) } FALSE diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index b14a90aeb..b805b865d 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -5,7 +5,7 @@ maybe_fuzz_content <- function(file, lines) { writeLines(lines, con = con, sep = "\n") close(con) } else { - new_file <- tempfile(fileext = tools::file_ext(file)) + new_file <- tempfile(fileext = paste0(".", tools::file_ext(file))) file.copy(file, new_file, copy.mode = FALSE) } diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R index d99016d78..bbda9d362 100644 --- a/tests/testthat/test-get_source_expressions.R +++ b/tests/testthat/test-get_source_expressions.R @@ -145,14 +145,14 @@ test_that("Warns if encoding is misspecified, Pt. 1", { test_that("Can extract line number from parser errors", { with_content_to_parse( -# # trim_some(' -# # "ok" -# # R"---a---" -# # '), -# # { -# # expect_identical(error$message, "Malformed raw string literal.") -# # expect_identical(error$line_number, 2L) -# # } + trim_some(' + "ok" + R"---a---" + '), + { + expect_identical(error$message, "Malformed raw string literal.") + expect_identical(error$line_number, 2L) + } ) with_content_to_parse( From 3d1fc0ea7f8b32ac7cecd2ad3a3e8343dd09a1ca Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:07:41 +0000 Subject: [PATCH 16/28] wrapping up --- .dev/ast_fuzz_test.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 84f263d7d..6ae518e31 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -97,3 +97,10 @@ valid_failure <- vapply( }, logical(1L) ) +if (!all(valid_failure)) { + failures <- failures[!valid_failure] + names(failures) <- vapply(failures, `[[`, "test", FUN.VALUE = character(1L)) + cat("Some fuzzed tests failed unexpectedly!\n") + print(failures) + stop("Use # nofuzz [start|end] to mark false positives or fix any bugs.") +} From b69b7cd0a8d8632a022c96a135c8212c04b80b63 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:11:53 +0000 Subject: [PATCH 17/28] Write up the GHA config --- .dev/ast_fuzz_test.R | 14 ++++++++------ .github/workflows/ast-fuzz.yaml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/ast-fuzz.yaml diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index 6ae518e31..d8a14ade7 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -10,6 +10,8 @@ # by randomly altering the contents of files encountered # under expect_lint() to swap known equivalencies. +library(xml2) + expect_lint_file <- "R/expect_lint.R" original <- readLines(expect_lint_file) @@ -42,7 +44,7 @@ pkgload::load_all() test_restorations <- list() for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) { - xml <- xml2::read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE))) + xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE))) # parent::* to catch top-level comments (exprlist) nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*") if (length(nofuzz_lines) == 0L) next @@ -50,8 +52,8 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = test_original <- test_lines <- readLines(test_file) for (nofuzz_line in nofuzz_lines) { - comments <- xml2::xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]") - comment_text <- xml2::xml_text(comments) + comments <- xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]") + comment_text <- xml_text(comments) start_idx <- grep("nofuzz start", comment_text, fixed = TRUE) end_idx <- grep("nofuzz end", comment_text, fixed = TRUE) if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) { @@ -62,8 +64,8 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = } comment_ranges <- Map(`:`, - as.integer(xml2::xml_attr(comments[start_idx], "line1")), - as.integer(xml2::xml_attr(comments[end_idx], "line1")) + as.integer(xml_attr(comments[start_idx], "line1")), + as.integer(xml_attr(comments[end_idx], "line1")) ) for (comment_range in comment_ranges) { test_lines[comment_range] <- paste("#", test_lines[comment_range]) @@ -74,7 +76,7 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = # NB: one-line tests line expect_lint(...) # nofuzz are not supported, # since the comment will attach to the parent test_that() & thus comment # out the whole unit. Easiest solution is just to spread out those few tests for now. - comment_range <- as.integer(xml2::xml_attr(nofuzz_line, "line1")):as.integer(xml2::xml_attr(nofuzz_line, "line2")) + comment_range <- as.integer(xml_attr(nofuzz_line, "line1")):as.integer(xml_attr(nofuzz_line, "line2")) test_lines[comment_range] <- paste("#", test_lines[comment_range]) } diff --git a/.github/workflows/ast-fuzz.yaml b/.github/workflows/ast-fuzz.yaml new file mode 100644 index 000000000..6242360cf --- /dev/null +++ b/.github/workflows/ast-fuzz.yaml @@ -0,0 +1,30 @@ +# Randomly change some code & ensure lint equivalency is maintained +on: + push: + branches: [main] + # TODO before merging: remove this. Only kept to demonstrate during review. + pull_request: + branches: [main] + +name: ast-fuzz + +jobs: + repo-meta-tests: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + + - name: Ensure equivalent code generates equivalent lints + run: | + callr::rscript(".dev/ast_fuzz_test.R") + shell: Rscript {0} From b8a06e39b3a9d6cbc8924fccc9143b2d945cdfe5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:23:16 +0000 Subject: [PATCH 18/28] annotation --- .dev/ast_fuzz_test.R | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index d8a14ade7..e67dc0ecc 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -42,10 +42,23 @@ withr::defer({ pkgload::load_all() +# beware lazy eval: originally tried adding a withr::defer() in each iteration, but +# this effectively only runs the last 'defer' expression as the names are only +# evaluated at run-time. So instead keep track of all edits in this object. +# this approach to implementing 'nofuzz' feels painfully manual, but I couldn't +# figure out how else to get 'testthat' to give us what we need -- the failures +# object in the reporter is frustratingly inconsistent in whether the trace +# exists, and even if it does, we'd have to text-mangle to get the corresponding +# file names out. Also, the trace 'srcref' happens under keep.source=FALSE, +# so we lose any associated comments anyway. even that would not solve the issue +# of getting top-level exclusions done for 'nofuzz start|end' ranges, except +# maybe if it enabled us to reuse lintr's own exclude() system. +# therefore we take this approach: pass over the test suite first and comment out +# any tests/units that have been marked 'nofuzz'. restore later. test_restorations <- list() for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) { xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE))) - # parent::* to catch top-level comments (exprlist) + # parent::* to catch top-level comments (exprlist). matches one-line nofuzz and start/end ranges. nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*") if (length(nofuzz_lines) == 0L) next @@ -54,6 +67,7 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = for (nofuzz_line in nofuzz_lines) { comments <- xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]") comment_text <- xml_text(comments) + # handle start/end ranges first. start_idx <- grep("nofuzz start", comment_text, fixed = TRUE) end_idx <- grep("nofuzz end", comment_text, fixed = TRUE) if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) { @@ -85,10 +99,18 @@ for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = } withr::defer(for (restoration in test_restorations) writeLines(restoration$lines, restoration$file)) +# for some reason, 'report <- test_dir(...)' did not work -- the resulting object is ~empty. +# even 'report <- test_local(...)', which does return an object, lacks any information about +# which tests failed (all reports are about successful or skipped tests). probably this is not +# the best approach but documentation was not very helpful. reporter <- testthat::SummaryReporter$new() testthat::test_local(reporter = reporter) failures <- reporter$failures$as_list() +# ignore any test that failed for expected reasons, e.g. some known lint metadata changes +# about line numbers or the contents of the line. this saves us having to pepper tons of +# 'nofuzz' comments throughout the suite, as well as getting around the difficulty of injecting +# 'expect_lint()' with new code to ignore these attributes (this latter we might explore later). valid_failure <- vapply( failures, function(failure) { From a3dbf278766af57ac7c007f98000a4fe45fd0ae8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:26:05 +0000 Subject: [PATCH 19/28] comment for future work --- .dev/maybe_fuzz_content.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index b805b865d..6a7849c39 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -14,6 +14,8 @@ maybe_fuzz_content <- function(file, lines) { new_file } +# we could also consider just passing any test where no fuzzing takes place, +# i.e. letting the other GHA handle whether unfuzzed tests pass as expected. fuzz_contents <- function(f) { # skip errors for e.g. Rmd files, and ignore warnings. # We could use get_source_expressions(), but with little benefit & much slower. From 5a22050b2ac38cb6346206f0860e4a6b2b88fcdf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 22:30:33 +0000 Subject: [PATCH 20/28] vestigial --- R/expect_lint.R | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/R/expect_lint.R b/R/expect_lint.R index 811dda0ea..6e1c9e630 100644 --- a/R/expect_lint.R +++ b/R/expect_lint.R @@ -166,47 +166,3 @@ require_testthat <- function() { ) } } -maybe_fuzz_content <- function(file, lines) { - new_file <- tempfile() - if (is.null(file)) { - con <- file(new_file, encoding = "UTF-8") - writeLines(lines, con = con, sep = "\n") - close(con) - } else { - file.copy(file, new_file, copy.mode = FALSE) - } - - fuzz_contents(new_file) - - new_file -} - -fuzz_contents <- function(f) { - pd <- getParseData(parse(f, keep.source = TRUE)) - - fun_tokens <- c("'\\\\'", "FUNCTION") - fun_idx <- which(pd$token %in% fun_tokens) - n_fun <- length(fun_idx) - - if (n_fun == 0L) { - return(invisible()) - } - - pd$new_token[fun_idx] <- sample(fun_tokens, length(fun_idx), replace = TRUE) - - l <- readLines(f) - - for (ii in rev(fun_idx)) { - if (pd$token[ii] == pd$new_token[ii]) next - browser() - ptn = paste0("^(.{", pd$col1 - 1L, "})") - } - - replacement_map <- c(FUNCTION = " \\", `'\\\\'` = "function") - start <- pd$col1[fun_idx] - substr(l[pd$line1[fun_idx]], start, start + nchar("function") - 1L) <- replacement_map[pd$token[fun_idx]] - - writeLines(l, f) - - invisible() -} From 76b869f8a5cbefbfbaebb7f4c8e20cc9e3ccd09d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:07:04 +0000 Subject: [PATCH 21/28] skips on old R --- tests/testthat/test-library_call_linter.R | 4 +++- tests/testthat/test-terminal_close_linter.R | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-library_call_linter.R b/tests/testthat/test-library_call_linter.R index f43790b36..aca3427c0 100644 --- a/tests/testthat/test-library_call_linter.R +++ b/tests/testthat/test-library_call_linter.R @@ -220,8 +220,10 @@ test_that("skips allowed usages of library()/character.only=TRUE", { expect_no_lint("library(data.table)", linter) expect_no_lint("function(pkg) library(pkg, character.only = TRUE)", linter) - expect_no_lint("\\(pkg) library(pkg, character.only = TRUE)", linter) expect_no_lint("function(pkgs) sapply(pkgs, require, character.only = TRUE)", linter) + + skip_if_not_r_version("4.1.0") + expect_no_lint("\\(pkg) library(pkg, character.only = TRUE)", linter) expect_no_lint("\\(pkgs) sapply(pkgs, require, character.only = TRUE)", linter) }) diff --git a/tests/testthat/test-terminal_close_linter.R b/tests/testthat/test-terminal_close_linter.R index b20149005..2a3d23d1c 100644 --- a/tests/testthat/test-terminal_close_linter.R +++ b/tests/testthat/test-terminal_close_linter.R @@ -82,6 +82,8 @@ test_that("terminal_close_linter blocks simple cases", { }) test_that("lints vectorize", { + skip_if_not_r_version("4.1.0") + expect_lint( trim_some("{ foo <- function() { From afec7431fb0d922da21489c71bba10fbcfa09ecf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:10:11 +0000 Subject: [PATCH 22/28] expect_no_lint --- .../testthat/test-unnecessary_lambda_linter.R | 96 +++++++++---------- 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/tests/testthat/test-unnecessary_lambda_linter.R b/tests/testthat/test-unnecessary_lambda_linter.R index 9a9839bb9..d223fad89 100644 --- a/tests/testthat/test-unnecessary_lambda_linter.R +++ b/tests/testthat/test-unnecessary_lambda_linter.R @@ -1,80 +1,77 @@ test_that("unnecessary_lambda_linter skips allowed usages", { linter <- unnecessary_lambda_linter() - expect_lint("lapply(DF, sum)", NULL, linter) - expect_lint("apply(M, 1, sum, na.rm = TRUE)", NULL, linter) + expect_no_lint("lapply(DF, sum)", linter) + expect_no_lint("apply(M, 1, sum, na.rm = TRUE)", linter) # the first argument may be ... or have a cumbersome name, so an anonymous # function may be preferable (e.g. this is often the case for grep() calls) - expect_lint("sapply(x, function(xi) foo(1, xi))", NULL, linter) - expect_lint("sapply(x, function(xi) return(foo(1, xi)))", NULL, linter) + expect_no_lint("sapply(x, function(xi) foo(1, xi))", linter) + expect_no_lint("sapply(x, function(xi) return(foo(1, xi)))", linter) # if the argument is re-used, that's also a no-go - expect_lint("dendrapply(x, function(xi) foo(xi, xi))", NULL, linter) + expect_no_lint("dendrapply(x, function(xi) foo(xi, xi))", linter) # at any nesting level - expect_lint("parLapply(cl, x, function(xi) foo(xi, 2, bar(baz(xi))))", NULL, linter) + expect_no_lint("parLapply(cl, x, function(xi) foo(xi, 2, bar(baz(xi))))", linter) # multi-expression case - expect_lint("lapply(x, function(xi) { print(xi); xi^2 })", NULL, linter) + expect_no_lint("lapply(x, function(xi) { print(xi); xi^2 })", linter) # multi-expression, multi-line cases - expect_lint( + expect_no_lint( trim_some(" lapply(x, function(xi) { print(xi); xi^2 }) "), - NULL, linter ) - expect_lint( + expect_no_lint( trim_some(" lapply(x, function(xi) { print(xi) xi^2 }) "), - NULL, linter ) # This _could_ be lapply(x, `%in%`, tbl), but don't force infix into lambda - expect_lint("lapply(x, function(xi) xi %in% tbl)", NULL, linter) + expect_no_lint("lapply(x, function(xi) xi %in% tbl)", linter) # This one could not - expect_lint("lapply(x, function(xi) tbl %in% xi)", NULL, linter) + expect_no_lint("lapply(x, function(xi) tbl %in% xi)", linter) # would require multiple lapply() loops - expect_lint("lapply(x, function(xi) foo(bar(xi)))", NULL, linter) - expect_lint("lapply(x, function(xi) return(foo(bar(xi))))", NULL, linter) + expect_no_lint("lapply(x, function(xi) foo(bar(xi)))", linter) + expect_no_lint("lapply(x, function(xi) return(foo(bar(xi))))", linter) # extractions, #2231 - expect_lint("lapply(l, function(x) rle(x)$values)", NULL, linter) - expect_lint('lapply(l, function(x) rle(x)["values"])', NULL, linter) - expect_lint('lapply(l, function(x) rle(x)[["values"]])', NULL, linter) - expect_lint("lapply(l, function(x) rle(x)@values)", NULL, linter) + expect_no_lint("lapply(l, function(x) rle(x)$values)", linter) + expect_no_lint('lapply(l, function(x) rle(x)["values"])', linter) + expect_no_lint('lapply(l, function(x) rle(x)[["values"]])', linter) + expect_no_lint("lapply(l, function(x) rle(x)@values)", linter) # return() extractions, #2258 - expect_lint("lapply(l, function(x) return(foo(x)$bar))", NULL, linter) - expect_lint('lapply(l, function(x) return(rle(x)["values"]))', NULL, linter) - expect_lint('lapply(l, function(x) return(rle(x)[["values"]]))', NULL, linter) - expect_lint("lapply(l, function(x) return(rle(x)@values))", NULL, linter) + expect_no_lint("lapply(l, function(x) return(foo(x)$bar))", linter) + expect_no_lint('lapply(l, function(x) return(rle(x)["values"]))', linter) + expect_no_lint('lapply(l, function(x) return(rle(x)[["values"]]))', linter) + expect_no_lint("lapply(l, function(x) return(rle(x)@values))", linter) # Other operators, #2247 - expect_lint("lapply(l, function(x) foo(x) - 1)", NULL, linter) - expect_lint("lapply(l, function(x) foo(x) * 2)", NULL, linter) - expect_lint("lapply(l, function(x) foo(x) ^ 3)", NULL, linter) - expect_lint("lapply(l, function(x) foo(x) %% 4)", NULL, linter) + expect_no_lint("lapply(l, function(x) foo(x) - 1)", linter) + expect_no_lint("lapply(l, function(x) foo(x) * 2)", linter) + expect_no_lint("lapply(l, function(x) foo(x) ^ 3)", linter) + expect_no_lint("lapply(l, function(x) foo(x) %% 4)", linter) # Don't include other lambdas, #2249 - expect_lint( + expect_no_lint( trim_some('{ lapply(x, function(e) sprintf("%o", e)) lapply(y, function(e) paste(strlpad(e, "0", width))) }'), - NULL, linter ) # only call is on RHS of operator, #2310 - expect_lint("lapply(l, function(x) 'a' %in% names(x))", NULL, linter) - expect_lint("lapply(l, function(x = 1) 'a' %in% names(x))", NULL, linter) + expect_no_lint("lapply(l, function(x) 'a' %in% names(x))", linter) + expect_no_lint("lapply(l, function(x = 1) 'a' %in% names(x))", linter) }) test_that("unnecessary_lambda_linter skips allowed inner comparisons", { @@ -82,13 +79,13 @@ test_that("unnecessary_lambda_linter skips allowed inner comparisons", { # lapply returns a list, so not the same, though as.list is probably # a better choice - expect_lint("lapply(x, function(xi) foo(xi) == 2)", NULL, linter) + expect_no_lint("lapply(x, function(xi) foo(xi) == 2)", linter) # this _may_ return a matrix, though outer is probably a better choice if so - expect_lint("sapply(x, function(xi) foo(xi) == y)", NULL, linter) + expect_no_lint("sapply(x, function(xi) foo(xi) == y)", linter) # only lint "plain" calls that can be replaced by eliminating the lambda - expect_lint("sapply(x, function(xi) sum(abs(xi)) == 0)", NULL, linter) + expect_no_lint("sapply(x, function(xi) sum(abs(xi)) == 0)", linter) }) test_that("unnecessary_lambda_linter blocks simple disallowed usage", { @@ -132,9 +129,9 @@ test_that("unnecessary_lambda_linter blocks simple disallowed usages", { expect_lint("sapply(x, function(xi) foo(xi) == 'a')", lint_msg, linter) expect_lint("sapply(x, function(xi) foo(xi) == 1 + 2i)", lint_msg, linter) - expect_lint("sapply(x, function(xi) foo(xi) == 2)", NULL, linter_allow) - expect_lint("sapply(x, function(xi) foo(xi) == 'a')", NULL, linter_allow) - expect_lint("sapply(x, function(xi) foo(xi) == 1 + 2i)", NULL, linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) == 2)", linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) == 'a')", linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) == 1 + 2i)", linter_allow) # vapply counts as well # NB: we ignore the FUN.VALUE argument, for now @@ -154,22 +151,24 @@ test_that("unnecessary_lambda_linter blocks other comparators as well", { expect_lint("sapply(x, function(xi) foo(xi) != 'a')", lint_msg, linter) expect_lint("sapply(x, function(xi) foo(xi) < 1 + 2i)", lint_msg, linter) - expect_lint("sapply(x, function(xi) foo(xi) >= 2)", NULL, linter_allow) - expect_lint("sapply(x, function(xi) foo(xi) != 'a')", NULL, linter_allow) - expect_lint("sapply(x, function(xi) foo(xi) < 1 + 2i)", NULL, linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) >= 2)", linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) != 'a')", linter_allow) + expect_no_lint("sapply(x, function(xi) foo(xi) < 1 + 2i)", linter_allow) }) test_that("unnecessary_lambda_linter doesn't apply to keyword args", { - expect_lint("lapply(x, function(xi) data.frame(nm = xi))", NULL, unnecessary_lambda_linter()) - expect_lint("lapply(x, function(xi) return(data.frame(nm = xi)))", NULL, unnecessary_lambda_linter()) + linter <- unnecessary_lambda_linter() + + expect_no_lint("lapply(x, function(xi) data.frame(nm = xi))", linter) + expect_no_lint("lapply(x, function(xi) return(data.frame(nm = xi)))", linter) }) test_that("purrr-style anonymous functions are also caught", { linter <- unnecessary_lambda_linter() - expect_lint("purrr::map(x, ~.x)", NULL, linter) - expect_lint("purrr::map_df(x, ~lm(y, .x))", NULL, linter) - expect_lint("map_dbl(x, ~foo(bar = .x))", NULL, linter) + expect_no_lint("purrr::map(x, ~.x)", linter) + expect_no_lint("purrr::map_df(x, ~lm(y, .x))", linter) + expect_no_lint("map_dbl(x, ~foo(bar = .x))", linter) expect_lint( "purrr::map(x, ~foo(.x))", @@ -234,20 +233,19 @@ test_that("cases with braces are caught", { linter ) - expect_lint( + expect_no_lint( trim_some(" lapply(x, function(xi) { print(xi) xi }) "), - NULL, linter ) # false positives like #2231, #2247 are avoided with braces too - expect_lint("lapply(x, function(xi) { foo(xi)$bar })", NULL, linter) - expect_lint("lapply(x, function(xi) { foo(xi) - 1 })", NULL, linter) + expect_no_lint("lapply(x, function(xi) { foo(xi)$bar })", linter) + expect_no_lint("lapply(x, function(xi) { foo(xi) - 1 })", linter) }) test_that("function shorthand is handled", { From 51593e408237a1c9f74368bdd215c0e0cacdd4a9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:10:58 +0000 Subject: [PATCH 23/28] new tests --- tests/testthat/test-unnecessary_lambda_linter.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-unnecessary_lambda_linter.R b/tests/testthat/test-unnecessary_lambda_linter.R index d223fad89..00f70d284 100644 --- a/tests/testthat/test-unnecessary_lambda_linter.R +++ b/tests/testthat/test-unnecessary_lambda_linter.R @@ -256,6 +256,14 @@ test_that("function shorthand is handled", { rex::rex("Pass sum directly as a symbol to lapply()"), unnecessary_lambda_linter() ) + + expect_lint("sapply(x, \\(xi) foo(xi) == 2)", lint_msg, linter) + expect_lint("sapply(x, \\(xi) foo(xi) == 'a')", lint_msg, linter) + expect_lint("sapply(x, \\(xi) foo(xi) == 1 + 2i)", lint_msg, linter) + + expect_no_lint("sapply(x, \\(xi) foo(xi) == 2)", linter_allow) + expect_no_lint("sapply(x, \\(xi) foo(xi) == 'a')", linter_allow) + expect_no_lint("sapply(x, \\(xi) foo(xi) == 1 + 2i)", linter_allow) }) test_that("lints vectorize", { From f4b9481f3ac160f67dfbc4849ddb8d79f69d0a47 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:13:14 +0000 Subject: [PATCH 24/28] NEWS --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 661038ad7..8ea061062 100644 --- a/NEWS.md +++ b/NEWS.md @@ -53,6 +53,10 @@ ### Lint accuracy fixes: removing false negatives * `todo_comment_linter()` finds comments inside {roxygen2} markup comments (#2447, @MichaelChirico). +* Linters with logic around function declarations consistently include the R 4.0.0 shorthand `\()` (#2818, continuation of earlier #2190, @MichaelChirico). + + `library_call_linter()` + + `terminal_close_linter()` + + `unnecessary_lambda_linter()` ## Notes From 6389d5539ce55be4e3292b8c051f28b314568c5b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 15:20:39 -0800 Subject: [PATCH 25/28] bad copy-paste --- tests/testthat/test-unnecessary_lambda_linter.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-unnecessary_lambda_linter.R b/tests/testthat/test-unnecessary_lambda_linter.R index 00f70d284..608421429 100644 --- a/tests/testthat/test-unnecessary_lambda_linter.R +++ b/tests/testthat/test-unnecessary_lambda_linter.R @@ -250,13 +250,16 @@ test_that("cases with braces are caught", { test_that("function shorthand is handled", { skip_if_not_r_version("4.1.0") - + linter <- unnecessary_lambda_linter() + linter_allow <- unnecessary_lambda_linter(allow_comparison = TRUE) + expect_lint( "lapply(DF, \\(x) sum(x))", rex::rex("Pass sum directly as a symbol to lapply()"), - unnecessary_lambda_linter() + linter ) + lint_msg <- rex::rex("Compare to a constant after calling sapply() to get", anything, "sapply(x, foo)") expect_lint("sapply(x, \\(xi) foo(xi) == 2)", lint_msg, linter) expect_lint("sapply(x, \\(xi) foo(xi) == 'a')", lint_msg, linter) expect_lint("sapply(x, \\(xi) foo(xi) == 1 + 2i)", lint_msg, linter) From 1550eadca280f3c25241d73a7ee6accddb2a817f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:36:51 +0000 Subject: [PATCH 26/28] need stop_on_failure for batch? --- .dev/ast_fuzz_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R index e67dc0ecc..501a5b2cc 100644 --- a/.dev/ast_fuzz_test.R +++ b/.dev/ast_fuzz_test.R @@ -104,7 +104,7 @@ withr::defer(for (restoration in test_restorations) writeLines(restoration$lines # which tests failed (all reports are about successful or skipped tests). probably this is not # the best approach but documentation was not very helpful. reporter <- testthat::SummaryReporter$new() -testthat::test_local(reporter = reporter) +testthat::test_local(reporter = reporter, stop_on_failure = FALSE) failures <- reporter$failures$as_list() # ignore any test that failed for expected reasons, e.g. some known lint metadata changes From bbdac439e2818c5c32d151f27406f426de088abc Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 6 Mar 2025 23:50:21 +0000 Subject: [PATCH 27/28] delint, fix last skip for R<4.1.0 --- tests/testthat/test-terminal_close_linter.R | 9 +++++---- tests/testthat/test-unnecessary_lambda_linter.R | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-terminal_close_linter.R b/tests/testthat/test-terminal_close_linter.R index 2a3d23d1c..2423745c5 100644 --- a/tests/testthat/test-terminal_close_linter.R +++ b/tests/testthat/test-terminal_close_linter.R @@ -20,17 +20,18 @@ test_that("terminal_close_linter skips allowed cases", { expect_no_lint(lines, linter) lines <- trim_some(" - foo <- \\(bar) { + foo <- function(bar) { close <- bar + 1 - return(close) + close } ") expect_no_lint(lines, linter) + skip_if_not_r_version("4.1.0") lines <- trim_some(" - foo <- function(bar) { + foo <- \\(bar) { close <- bar + 1 - close + return(close) } ") expect_no_lint(lines, linter) diff --git a/tests/testthat/test-unnecessary_lambda_linter.R b/tests/testthat/test-unnecessary_lambda_linter.R index 608421429..44655b44b 100644 --- a/tests/testthat/test-unnecessary_lambda_linter.R +++ b/tests/testthat/test-unnecessary_lambda_linter.R @@ -252,7 +252,7 @@ test_that("function shorthand is handled", { skip_if_not_r_version("4.1.0") linter <- unnecessary_lambda_linter() linter_allow <- unnecessary_lambda_linter(allow_comparison = TRUE) - + expect_lint( "lapply(DF, \\(x) sum(x))", rex::rex("Pass sum directly as a symbol to lapply()"), From 523c21867fdd8afb6706bb00938943bb41e840f3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 7 Mar 2025 00:14:49 +0000 Subject: [PATCH 28/28] more extensible structure --- .dev/maybe_fuzz_content.R | 42 +++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R index 6a7849c39..3db32d7c1 100644 --- a/.dev/maybe_fuzz_content.R +++ b/.dev/maybe_fuzz_content.R @@ -9,21 +9,12 @@ maybe_fuzz_content <- function(file, lines) { file.copy(file, new_file, copy.mode = FALSE) } - fuzz_contents(new_file) + apply_fuzzers(new_file) new_file } -# we could also consider just passing any test where no fuzzing takes place, -# i.e. letting the other GHA handle whether unfuzzed tests pass as expected. -fuzz_contents <- function(f) { - # skip errors for e.g. Rmd files, and ignore warnings. - # We could use get_source_expressions(), but with little benefit & much slower. - pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity) - if (inherits(pd, "error")) { - return(invisible()) - } - +function_lambda_fuzzer <- function(pd, lines) { fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function") fun_idx <- which(pd$token %in% names(fun_tokens)) n_fun <- length(fun_idx) @@ -35,16 +26,37 @@ fuzz_contents <- function(f) { pd$new_text <- NA_character_ pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE) - l <- readLines(f) - for (ii in rev(fun_idx)) { if (pd$text[ii] == pd$new_text[ii]) next # Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96 ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii])) - l[pd$line1[ii]] <- rex::re_substitutes(l[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii]))) + lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii]))) } + lines +} - writeLines(l, f) +# we could also consider just passing any test where no fuzzing takes place, +# i.e. letting the other GHA handle whether unfuzzed tests pass as expected. +apply_fuzzers <- function(f) { + # skip errors for e.g. Rmd files, and ignore warnings. + # We could use get_source_expressions(), but with little benefit & much slower. + pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity) + if (inherits(pd, "error")) { + return(invisible()) + } + + reparse <- FALSE + lines <- readLines(f) + for (fuzzer in list(function_lambda_fuzzer)) { + if (reparse) { + pd <- getParseData(parse(f, keep.source = TRUE)) + lines <- readLines(f) + } + updated_lines <- fuzzer(pd, lines) + reparse <- !is.null(updated_lines) + if (!reparse) next # skip some I/O if we can + writeLines(updated_lines, f) + } invisible() }