From 56740088059f88f13293bbd94c756be928a193c8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 13 Nov 2023 22:29:02 -0800 Subject: [PATCH] New sample_int_linter (#2274) * New sample_int_linter * add metadata regression test --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 1 + R/sample_int_linter.R | 60 ++++++++++++++++ inst/lintr/linters.csv | 1 + man/efficiency_linters.Rd | 1 + man/linters.Rd | 7 +- man/readability_linters.Rd | 1 + man/robustness_linters.Rd | 1 + man/sample_int_linter.Rd | 18 +++++ tests/testthat/test-sample_int_linter.R | 95 +++++++++++++++++++++++++ 11 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 R/sample_int_linter.R create mode 100644 man/sample_int_linter.Rd create mode 100644 tests/testthat/test-sample_int_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index 1f7264286..31871bd90 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -156,6 +156,7 @@ Collate: 'regex_subset_linter.R' 'repeat_linter.R' 'routine_registration_linter.R' + 'sample_int_linter.R' 'scalar_in_linter.R' 'semicolon_linter.R' 'seq_linter.R' diff --git a/NAMESPACE b/NAMESPACE index 9da6d5205..a6c17992e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,7 @@ export(redundant_ifelse_linter) export(regex_subset_linter) export(repeat_linter) export(routine_registration_linter) +export(sample_int_linter) export(sarif_output) export(scalar_in_linter) export(semicolon_linter) diff --git a/NEWS.md b/NEWS.md index 4f53beb0e..fb391d3fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ ### New linters +* `sample_int_linter()` for encouraging `sample.int(n, ...)` over equivalents like `sample(1:n, ...)` (part of #884, @MichaelChirico). * `stopifnot_all_linter()` discourages tests with `all()` like `stopifnot(all(x > 0))`; `stopifnot()` runs `all()` itself, and uses a better error message (part of #884, @MichaelChirico). * `comparison_negation_linter()` for discouraging negated comparisons when a direct negation is preferable, e.g. `!(x == y)` could be `x != y` (part of #884, @MichaelChirico). * `terminal_close_linter()` for discouraging using `close()` to end functions (part of #884, @MichaelChirico). Such usages are not robust to errors, where `close()` will not be run as intended. Put `close()` in an `on.exit()` hook, or use {withr} to manage connections with proper cleanup. diff --git a/R/sample_int_linter.R b/R/sample_int_linter.R new file mode 100644 index 000000000..a9da00f6f --- /dev/null +++ b/R/sample_int_linter.R @@ -0,0 +1,60 @@ +#' Require usage of sample.int(n, m, ...) over sample(1:n, m, ...) +#' +#' [sample.int()] is preferable to `sample()` for the case of sampling numbers +#' between 1 and `n`. `sample` calls `sample.int()` "under the hood". +#' +#' @evalRd rd_tags("sample_int_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +sample_int_linter <- function() { + # looking for anything like sample(1: that doesn't come after a $ extraction + # exclude TRUE/FALSE for sample(replace = TRUE, ...) usage. better + # would be match.arg() but this also works. + xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'sample'] + /parent::expr[not(OP-DOLLAR or OP-AT)] + /following-sibling::expr[1][ + ( + expr[1]/NUM_CONST[text() = '1' or text() = '1L'] + and OP-COLON + ) + or expr/SYMBOL_FUNCTION_CALL[text() = 'seq_len'] + or ( + expr/SYMBOL_FUNCTION_CALL[text() = 'seq'] + and ( + count(expr) = 2 + or ( + expr[2]/NUM_CONST[text() = '1' or text() = '1L'] + and not(SYMBOL_SUB[ + text() = 'by' + and not(following-sibling::expr[1]/NUM_CONST[text() = '1' or text() = '1L']) + ]) + ) + ) + ) + or NUM_CONST[not(text() = 'TRUE' or text() = 'FALSE')] + ] + /parent::expr + ") + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + first_call <- xp_call_name(bad_expr, depth = 2L) + original <- sprintf("%s(n)", first_call) + original[!is.na(xml_find_first(bad_expr, "expr[2]/OP-COLON"))] <- "1:n" + original[!is.na(xml_find_first(bad_expr, "expr[2]/NUM_CONST"))] <- "n" + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = glue("sample.int(n, m, ...) is preferable to sample({original}, m, ...)."), + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 275c1863d..c6ca4ca94 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -76,6 +76,7 @@ redundant_ifelse_linter,best_practices efficiency consistency configurable regex_subset_linter,best_practices efficiency repeat_linter,style readability routine_registration_linter,best_practices efficiency robustness +sample_int_linter,efficiency readability robustness scalar_in_linter,readability consistency best_practices efficiency semicolon_linter,style readability default configurable semicolon_terminator_linter,style readability deprecated configurable diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 7122f90e8..146c0be54 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -28,6 +28,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{redundant_ifelse_linter}}} \item{\code{\link{regex_subset_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{scalar_in_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{sort_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index c960b5d00..ef71af792 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -24,12 +24,12 @@ The following tags exist: \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (8 linters)} -\item{\link[=efficiency_linters]{efficiency} (25 linters)} +\item{\link[=efficiency_linters]{efficiency} (26 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (56 linters)} -\item{\link[=robustness_linters]{robustness} (15 linters)} +\item{\link[=readability_linters]{readability} (57 linters)} +\item{\link[=robustness_linters]{robustness} (16 linters)} \item{\link[=style_linters]{style} (38 linters)} } } @@ -108,6 +108,7 @@ The following linters exist: \item{\code{\link{regex_subset_linter}} (tags: best_practices, efficiency)} \item{\code{\link{repeat_linter}} (tags: readability, style)} \item{\code{\link{routine_registration_linter}} (tags: best_practices, efficiency, robustness)} +\item{\code{\link{sample_int_linter}} (tags: efficiency, readability, robustness)} \item{\code{\link{scalar_in_linter}} (tags: best_practices, consistency, efficiency, readability)} \item{\code{\link{semicolon_linter}} (tags: configurable, default, readability, style)} \item{\code{\link{seq_linter}} (tags: best_practices, consistency, default, efficiency, robustness)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index 7bb1a809f..06deb9233 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -53,6 +53,7 @@ The following linters are tagged with 'readability': \item{\code{\link{quotes_linter}}} \item{\code{\link{redundant_equals_linter}}} \item{\code{\link{repeat_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{scalar_in_linter}}} \item{\code{\link{semicolon_linter}}} \item{\code{\link{sort_linter}}} diff --git a/man/robustness_linters.Rd b/man/robustness_linters.Rd index 499965308..18f12ca61 100644 --- a/man/robustness_linters.Rd +++ b/man/robustness_linters.Rd @@ -21,6 +21,7 @@ The following linters are tagged with 'robustness': \item{\code{\link{namespace_linter}}} \item{\code{\link{nonportable_path_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{strings_as_factors_linter}}} \item{\code{\link{T_and_F_symbol_linter}}} diff --git a/man/sample_int_linter.Rd b/man/sample_int_linter.Rd new file mode 100644 index 000000000..ed4c7a7fc --- /dev/null +++ b/man/sample_int_linter.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sample_int_linter.R +\name{sample_int_linter} +\alias{sample_int_linter} +\title{Require usage of sample.int(n, m, ...) over sample(1:n, m, ...)} +\usage{ +sample_int_linter() +} +\description{ +\code{\link[=sample.int]{sample.int()}} is preferable to \code{sample()} for the case of sampling numbers +between 1 and \code{n}. \code{sample} calls \code{sample.int()} "under the hood". +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability}, \link[=robustness_linters]{robustness} +} diff --git a/tests/testthat/test-sample_int_linter.R b/tests/testthat/test-sample_int_linter.R new file mode 100644 index 000000000..bf217cbee --- /dev/null +++ b/tests/testthat/test-sample_int_linter.R @@ -0,0 +1,95 @@ +test_that("sample_int_linter skips allowed usages", { + linter <- sample_int_linter() + + expect_lint("sample(n, m)", NULL, linter) + expect_lint("sample(n, m, TRUE)", NULL, linter) + expect_lint("sample(n, m, prob = 1:n/n)", NULL, linter) + expect_lint("sample(foo(x), m, TRUE)", NULL, linter) + expect_lint("sample(n, replace = TRUE)", NULL, linter) + + expect_lint("sample(10:1, m)", NULL, linter) +}) + +test_that("sample_int_linter blocks simple disallowed usages", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(1:n, m, ...).") + + expect_lint("sample(1:10, 2)", lint_msg, linter) + # also matches literal integer + expect_lint("sample(1L:10L, 2)", lint_msg, linter) + expect_lint("sample(1:n, 2)", lint_msg, linter) + expect_lint("sample(1:k, replace = TRUE)", lint_msg, linter) + expect_lint("sample(1:foo(x), prob = bar(x))", lint_msg, linter) +}) + +test_that("sample_int_linter blocks sample(seq_len(n), ...) as well", { + expect_lint( + "sample(seq_len(10), 2)", + rex::rex("sample.int(n, m, ...) is preferable to sample(seq_len(n), m, ...)."), + sample_int_linter() + ) +}) + +test_that("sample_int_linter blocks sample(seq(n)) and sample(seq(1, ...))", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(seq(n), m, ...).") + + expect_lint("sample(seq(n), 5)", lint_msg, linter) + expect_lint("sample(seq(1, 10), 5)", lint_msg, linter) + expect_lint("sample(seq(1, 10, by = 1), 5)", lint_msg, linter) + expect_lint("sample(seq(1L, 10, by = 1L), 5)", lint_msg, linter) + + # lint doesn't apply when by= is used (except when set to literal 1) + expect_lint("sample(seq(1, 10, by = 2), 5)", NULL, linter) + expect_lint("sample(seq(1, 10, by = n), 5)", NULL, linter) +}) + +test_that("sample_int_linter catches literal integer/numeric in the first arg", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).") + + expect_lint("sample(10L, 4)", lint_msg, linter) + expect_lint("sample(10, 5)", lint_msg, linter) +}) + +test_that("sample_int_linter skips TRUE or FALSE in the first argument", { + linter <- sample_int_linter() + + expect_lint("sample(replace = TRUE, letters)", NULL, linter) + expect_lint("sample(replace = FALSE, letters)", NULL, linter) +}) + +test_that("sample_int_linter skips x$sample() usage", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).") + + expect_lint("foo$sample(1L)", NULL, linter) + expect_lint("foo$sample(1:10)", NULL, linter) + expect_lint("foo$sample(seq_len(10L))", NULL, linter) + # ditto for '@' slot extraction + expect_lint("foo@sample(1L)", NULL, linter) + + # however, base::sample qualification is still caught + expect_lint("base::sample(10L)", lint_msg, linter) + + # but also, not everything "below" a $ extraction is skipped + expect_lint("foo$bar(sample(10L))", lint_msg, linter) +}) + +test_that("multiple lints are generated correctly", { + expect_lint( + trim_some("{ + sample(1:10, 2) + sample(10, 2) + sample(seq_len(10), 2) + sample(seq(10), 2) + }"), + list( + list(rex::rex("sample(1:n"), line_number = 2L, column_number = 3L), + list(rex::rex("sample(n"), line_number = 3L, column_number = 3L), + list(rex::rex("sample(seq_len(n)"), line_number = 4L, column_number = 3L), + list(rex::rex("sample(seq(n)"), line_number = 5L, column_number = 3L) + ), + sample_int_linter() + ) +})