diff --git a/DESCRIPTION b/DESCRIPTION index efd67c0..7ba11ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,4 +21,4 @@ Config/Needs/website: rmarkdown Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.1.9000 diff --git a/NAMESPACE b/NAMESPACE index 4ede74c..e01d848 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,9 @@ # Generated by roxygen2: do not edit by hand +export(is_sparse_double) +export(is_sparse_vector) export(sparse_double) +export(sparse_positions) +export(sparse_values) import(rlang) useDynLib(sparsevctrs, .registration = TRUE) diff --git a/R/altrep.R b/R/altrep.R index bf288c7..9744fea 100644 --- a/R/altrep.R +++ b/R/altrep.R @@ -1,7 +1,7 @@ #' Create sparse double vector #' -#' @param value Numeric vector, values of non-zero entries. -#' @param position integer vector, indices of non-zero entries. +#' @param values Numeric vector, values of non-zero entries. +#' @param positions integer vector, indices of non-zero entries. #' @param length Integer, Length of vector. #' #' @details @@ -20,142 +20,131 @@ #' sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000) #' ) #' @export -sparse_double <- function(value, position, length) { +sparse_double <- function(values, positions, length) { check_number_whole(length, min = 0) if (!is.integer(length)) { length <- as.integer(length) } - if (!is.numeric(value)) { + if (!is.numeric(values)) { cli::cli_abort( - "{.arg value} must be a numeric vector, not {.obj_type_friendly {value}}." + "{.arg values} must be a numeric vector, \\ + not {.obj_type_friendly {values}}." ) } - if (any(is.infinite(value))) { - offenders <- which(is.infinite(value)) + if (any(is.infinite(values))) { + offenders <- which(is.infinite(values)) cli::cli_abort( c( - x = "{.arg value} must not contain infinite values.", + x = "{.arg values} must not contain infinite values.", i = "Infinite values at index: {offenders}." ) ) } - if (is.integer(value)) { - value <- as.double(value) + if (is.integer(values)) { + values <- as.double(values) } - if (!is.numeric(position)) { + if (!is.numeric(positions)) { cli::cli_abort( - "{.arg position} must be a integer vector, \\ - not {.obj_type_friendly {value}}." + "{.arg positions} must be a integer vector, \\ + not {.obj_type_friendly {positions}}." ) } - if (any(is.infinite(position))) { - offenders <- which(is.infinite(position)) + if (any(is.infinite(positions))) { + offenders <- which(is.infinite(positions)) cli::cli_abort( c( - x = "{.arg position} must not contain infinite values.", + x = "{.arg positions} must not contain infinite values.", i = "Infinite values at index: {offenders}." ) ) } - if (!is.integer(position)) { - if (any(round(position) != position, na.rm = TRUE)) { - offenders <- which(round(position) != position) + if (!is.integer(positions)) { + if (any(round(positions) != positions, na.rm = TRUE)) { + offenders <- which(round(positions) != positions) cli::cli_abort( c( - x = "{.arg position} must contain integer values.", + x = "{.arg positions} must contain integer values.", i = "Non-integer values at index: {offenders}." ) ) } - position <- as.integer(position) + positions <- as.integer(positions) } - len_value <- length(value) - len_position <- length(position) + len_values <- length(values) + len_positions <- length(positions) - if (len_value != len_position) { + if (len_values != len_positions) { cli::cli_abort( - "{.arg value} ({len_value}) and {.arg position} ({len_position}) \\ + "{.arg value} ({len_values}) and {.arg positions} ({len_positions}) \\ must have the same length." ) } - if (anyDuplicated(position) > 0) { - offenders <- which(duplicated(position)) + if (anyDuplicated(positions) > 0) { + offenders <- which(duplicated(positions)) cli::cli_abort( c( - x = "{.arg position} must not contain any duplicate values.", + x = "{.arg positions} must not contain any duplicate values.", i = "Duplicate values at index: {offenders}." ) ) } - if (is.unsorted(position)) { + if (is.unsorted(positions)) { cli::cli_abort( - "{.arg position} must be sorted in increasing order." + "{.arg positions} must be sorted in increasing order." ) } - if (len_position > 0 && max(position) > length) { - offenders <- which(position > length) + if (len_positions > 0 && max(positions) > length) { + offenders <- which(positions > length) cli::cli_abort( c( - x = "{.arg position} value must not be larger than {.arg length}.", + x = "{.arg positions} value must not be larger than {.arg length}.", i = "Offending values at index: {offenders}." ) ) } - if (len_position > 0 && min(position) < 1) { - offenders <- which(position < 1) + if (len_positions > 0 && min(positions) < 1) { + offenders <- which(positions < 1) cli::cli_abort( c( - x = "{.arg position} value must positive.", + x = "{.arg positions} value must positive.", i = "Non-positive values at index: {offenders}." ) ) } - if (any(value == 0)) { - offenders <- which(value == 0) + if (any(values == 0)) { + offenders <- which(values == 0) cli::cli_abort( c( - x = "{.arg value} value must not be 0.", + x = "{.arg values} value must not be 0.", i = "0 values at index: {offenders}." ) ) } - new_sparse_double(value, position, length) + new_sparse_double(values, positions, length) } -new_sparse_double <- function(value, position, length) { +new_sparse_double <- function(values, positions, length) { x <- list( - val = value, - pos = position, + val = values, + pos = positions, len = length ) .Call(ffi_altrep_new_sparse_double, x) } - -is_sparse_vector <- function(x) { - res <- .Call(ffi_extract_altrep_class, x) - if (is.null(res)) { - return(FALSE) - } - - res <- as.character(res[[1]]) - - res %in% c("altrep_sparse_double") - } - \ No newline at end of file diff --git a/R/extractors.R b/R/extractors.R index c1ce279..c586cd6 100644 --- a/R/extractors.R +++ b/R/extractors.R @@ -1,4 +1,28 @@ -.positions <- function(x) { +#' Information extraction from sparse vectors +#' +#' Extract positions and values from sparse vectors without the need to +#' materialize vector. +#' +#' @param x vector to be extracted from. +#' +#' @details +#' for ease of use, these functions also works on non-sparse variables. +#' +#' @examples +#' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) +#' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) +#' +#' sparse_positions(x_sparse) +#' sparse_values(x_sparse) +#' +#' sparse_positions(x_dense) +#' sparse_values(x_dense) +#' @name extractors +NULL + +#' @rdname extractors +#' @export +sparse_positions <- function(x) { if (!is_sparse_vector(x)) { return(seq_along(x)) } @@ -6,7 +30,9 @@ .Call(ffi_altrep_sparse_positions, x) } -.values <- function(x) { +#' @rdname extractors +#' @export +sparse_values <- function(x) { if (!is_sparse_vector(x)) { return(x) } diff --git a/R/type-predicates.R b/R/type-predicates.R new file mode 100644 index 0000000..0d75599 --- /dev/null +++ b/R/type-predicates.R @@ -0,0 +1,41 @@ +#' Sparse vector type checkers +#' +#' @param x value to be checked. +#' +#' @examples +#' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) +#' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) +#' +#' is_sparse_vector(x_sparse) +#' is_sparse_vector(x_dense) +#' +#' # Forced materialization +#' is_sparse_vector(x_sparse[]) +#' @name type-predicates +NULL + +#' @rdname type-predicates +#' @export +is_sparse_vector <- function(x) { + res <- .Call(ffi_extract_altrep_class, x) + if (is.null(res)) { + return(FALSE) + } + + res <- as.character(res[[1]]) + + res %in% c("altrep_sparse_double") +} + +#' @rdname type-predicates +#' @export +is_sparse_double <- function(x) { + res <- .Call(ffi_extract_altrep_class, x) + if (is.null(res)) { + return(FALSE) + } + + res <- as.character(res[[1]]) + + res == "altrep_sparse_double" +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 616a3e9..9acf5cc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -2,3 +2,12 @@ url: https://emilhvitfeldt.github.io/sparsevctrs/ template: bootstrap: 5 +reference: + - title: Create Sparse Vectors + contents: + - sparse_double + + - title: Helper Functions + contents: + - type-predicates + - extractors diff --git a/man/extractors.Rd b/man/extractors.Rd new file mode 100644 index 0000000..f4e8ff9 --- /dev/null +++ b/man/extractors.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extractors.R +\name{extractors} +\alias{extractors} +\alias{sparse_positions} +\alias{sparse_values} +\title{Information extraction from sparse vectors} +\usage{ +sparse_positions(x) + +sparse_values(x) +} +\arguments{ +\item{x}{vector to be extracted from.} +} +\description{ +Extract positions and values from sparse vectors without the need to +materialize vector. +} +\details{ +for ease of use, these functions also works on non-sparse variables. +} +\examples{ +x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) +x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) + +sparse_positions(x_sparse) +sparse_values(x_sparse) + +sparse_positions(x_dense) +sparse_values(x_dense) +} diff --git a/man/sparse_double.Rd b/man/sparse_double.Rd index 919b7aa..739030a 100644 --- a/man/sparse_double.Rd +++ b/man/sparse_double.Rd @@ -4,12 +4,12 @@ \alias{sparse_double} \title{Create sparse double vector} \usage{ -sparse_double(value, position, length) +sparse_double(values, positions, length) } \arguments{ -\item{value}{Numeric vector, values of non-zero entries.} +\item{values}{Numeric vector, values of non-zero entries.} -\item{position}{integer vector, indices of non-zero entries.} +\item{positions}{integer vector, indices of non-zero entries.} \item{length}{Integer, Length of vector.} } diff --git a/man/type-predicates.Rd b/man/type-predicates.Rd new file mode 100644 index 0000000..dc5a19f --- /dev/null +++ b/man/type-predicates.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/type-predicates.R +\name{type-predicates} +\alias{type-predicates} +\alias{is_sparse_vector} +\alias{is_sparse_double} +\title{Sparse vector type checkers} +\usage{ +is_sparse_vector(x) + +is_sparse_double(x) +} +\arguments{ +\item{x}{value to be checked.} +} +\description{ +Sparse vector type checkers +} +\examples{ +x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) +x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) + +is_sparse_vector(x_sparse) +is_sparse_vector(x_dense) + +# Forced materialization +is_sparse_vector(x_sparse[]) +} diff --git a/tests/testthat/_snaps/altrep.md b/tests/testthat/_snaps/altrep.md index 1a22493..ee435f3 100644 --- a/tests/testthat/_snaps/altrep.md +++ b/tests/testthat/_snaps/altrep.md @@ -4,7 +4,7 @@ sparse_double("1", 1, 1) Condition Error in `sparse_double()`: - ! `value` must be a numeric vector, not a string. + ! `values` must be a numeric vector, not a string. --- @@ -12,7 +12,7 @@ sparse_double(NULL, 1, 1) Condition Error in `sparse_double()`: - ! `value` must be a numeric vector, not NULL. + ! `values` must be a numeric vector, not NULL. --- @@ -20,7 +20,7 @@ sparse_double(NA, 1, 1) Condition Error in `sparse_double()`: - ! `value` must be a numeric vector, not `NA`. + ! `values` must be a numeric vector, not `NA`. --- @@ -28,7 +28,7 @@ sparse_double(Inf, 1, 1) Condition Error in `sparse_double()`: - x `value` must not contain infinite values. + x `values` must not contain infinite values. i Infinite values at index: 1. --- @@ -36,7 +36,7 @@ Code sparse_double(NaN, 1, 1) Condition - Error in `if (any(value == 0)) ...`: + Error in `if (any(values == 0)) ...`: ! missing value where TRUE/FALSE needed --- @@ -45,7 +45,7 @@ sparse_double(1, 1.5, 1) Condition Error in `sparse_double()`: - x `position` must contain integer values. + x `positions` must contain integer values. i Non-integer values at index: 1. --- @@ -54,7 +54,7 @@ sparse_double(1, "1", 1) Condition Error in `sparse_double()`: - ! `position` must be a integer vector, not a number. + ! `positions` must be a integer vector, not a string. --- @@ -62,7 +62,7 @@ sparse_double(1, NULL, 1) Condition Error in `sparse_double()`: - ! `position` must be a integer vector, not a number. + ! `positions` must be a integer vector, not NULL. --- @@ -70,7 +70,7 @@ sparse_double(1, NA, 1) Condition Error in `sparse_double()`: - ! `position` must be a integer vector, not a number. + ! `positions` must be a integer vector, not `NA`. --- @@ -78,7 +78,7 @@ sparse_double(1, Inf, 1) Condition Error in `sparse_double()`: - x `position` must not contain infinite values. + x `positions` must not contain infinite values. i Infinite values at index: 1. --- @@ -86,7 +86,7 @@ Code sparse_double(1, NaN, 1) Condition - Error in `if (len_position > 0 && max(position) > length) ...`: + Error in `if (len_positions > 0 && max(positions) > length) ...`: ! missing value where TRUE/FALSE needed --- @@ -151,7 +151,7 @@ sparse_double(1:4, 1:6, 10) Condition Error in `sparse_double()`: - ! `value` (4) and `position` (6) must have the same length. + ! `value` (4) and `positions` (6) must have the same length. --- @@ -159,7 +159,7 @@ sparse_double(1, 1:6, 10) Condition Error in `sparse_double()`: - ! `value` (1) and `position` (6) must have the same length. + ! `value` (1) and `positions` (6) must have the same length. --- @@ -167,7 +167,7 @@ sparse_double(1:4, c(1, 1, 5, 6), 10) Condition Error in `sparse_double()`: - x `position` must not contain any duplicate values. + x `positions` must not contain any duplicate values. i Duplicate values at index: 2. --- @@ -176,7 +176,7 @@ sparse_double(1:100, rep(1, 100), 100) Condition Error in `sparse_double()`: - x `position` must not contain any duplicate values. + x `positions` must not contain any duplicate values. i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100. --- @@ -185,7 +185,7 @@ sparse_double(c(1, 2), c(3, 1), 5) Condition Error in `sparse_double()`: - ! `position` must be sorted in increasing order. + ! `positions` must be sorted in increasing order. --- @@ -193,7 +193,7 @@ sparse_double(1, 10, 5) Condition Error in `sparse_double()`: - x `position` value must not be larger than `length`. + x `positions` value must not be larger than `length`. i Offending values at index: 1. --- @@ -202,7 +202,7 @@ sparse_double(rep(1, 50), seq(25, 74), 50) Condition Error in `sparse_double()`: - x `position` value must not be larger than `length`. + x `positions` value must not be larger than `length`. i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. --- @@ -211,7 +211,7 @@ sparse_double(1, 0, 5) Condition Error in `sparse_double()`: - x `position` value must positive. + x `positions` value must positive. i Non-positive values at index: 1. --- @@ -220,7 +220,7 @@ sparse_double(rep(1, 101), seq(-50, 50), 100) Condition Error in `sparse_double()`: - x `position` value must positive. + x `positions` value must positive. i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. --- @@ -229,7 +229,7 @@ sparse_double(0, 1, 10) Condition Error in `sparse_double()`: - x `value` value must not be 0. + x `values` value must not be 0. i 0 values at index: 1. --- @@ -238,7 +238,7 @@ sparse_double(rep(c(1, 0), 5), 1:10, 50) Condition Error in `sparse_double()`: - x `value` value must not be 0. + x `values` value must not be 0. i 0 values at index: 2, 4, 6, 8, and 10. # verbose testing diff --git a/tests/testthat/test-extractors.R b/tests/testthat/test-extractors.R index e6aa5a5..7148971 100644 --- a/tests/testthat/test-extractors.R +++ b/tests/testthat/test-extractors.R @@ -1,47 +1,47 @@ -test_that(".positions works with altrep_sparse_double", { +test_that("sparse_positions works with altrep_sparse_double", { expect_identical( - .positions(sparse_double(1, 5, 10)), + sparse_positions(sparse_double(1, 5, 10)), 5L ) expect_identical( - .positions(sparse_double(1:3, 5:7, 10)), + sparse_positions(sparse_double(1:3, 5:7, 10)), 5:7 ) }) -test_that(".positions works with numeric vectors", { +test_that("sparse_positions works with numeric vectors", { expect_identical( - .positions(c(1, 6, 4, 2)), + sparse_positions(c(1, 6, 4, 2)), seq_len(4) ) expect_identical( - .positions(101:200), + sparse_positions(101:200), 1:100 ) }) -test_that(".values works with altrep_sparse_double", { +test_that("sparse_values works with altrep_sparse_double", { expect_identical( - .values(sparse_double(1, 5, 10)), + sparse_values(sparse_double(1, 5, 10)), 1 ) expect_identical( - .values(sparse_double(1:3, 5:7, 10)), + sparse_values(sparse_double(1:3, 5:7, 10)), c(1, 2, 3) ) }) -test_that(".values works with numeric vectors", { +test_that("sparse_values works with numeric vectors", { expect_identical( - .values(c(1, 6, 4, 2)), + sparse_values(c(1, 6, 4, 2)), c(1, 6, 4, 2) ) expect_identical( - .values(101:200), + sparse_values(101:200), 101:200 ) })