diff --git a/.Rbuildignore b/.Rbuildignore index a6dcd60..fd89c3a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,4 +7,7 @@ ^pkgdown$ ^\.github$ ^vignettes/articles$ +^compile_commands\.json$ +.clang-format +^.cache$ ^codecov\.yml$ diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..7786f44 --- /dev/null +++ b/.clang-format @@ -0,0 +1,12 @@ +BasedOnStyle: Google +IndentWidth: 2 +DerivePointerAlignment: false +PointerAlignment: Left +ColumnLimit: 80 +AlignAfterOpenBracket: BlockIndent +SpaceAfterCStyleCast: true +IncludeBlocks: Regroup +AllowShortFunctionsOnASingleLine: Empty +BinPackArguments: false +BinPackParameters: false +AllowAllParametersOfDeclarationOnNextLine: false \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9168bf8..e2b322b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ .httr-oauth .DS_Store docs +compile_commands.json +.cache \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 40d25de..d561260 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,6 +9,8 @@ License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 +Depends: + R (>= 4.0.0) Imports: Matrix, tibble, @@ -17,5 +19,7 @@ URL: https://github.com/EmilHvitfeldt/sparsevctrs, https://emilhvitfeldt.github. BugReports: https://github.com/EmilHvitfeldt/sparsevctrs/issues Suggests: rlang, - rsparse + rsparse, + testthat (>= 3.0.0) Config/Needs/website: rmarkdown +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 51f82d5..25adadb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,10 @@ S3method(format,sparse_vector) S3method(vec_arith,sparse_vector) S3method(vec_math,sparse_vector) S3method(vec_ptype_abbr,sparse_vector) +export(new_sparse_real) export(new_sparse_vector) export(sparse_to_tibble) export(sparse_vector) export(tibble_to_sparse) import(vctrs) +useDynLib(sparsevctrs, .registration = TRUE) diff --git a/R/altrep.R b/R/altrep.R new file mode 100644 index 0000000..6a25353 --- /dev/null +++ b/R/altrep.R @@ -0,0 +1,12 @@ +#' Create sparse numeric vector +#' +#' @param value Numeric vector, values of non-zero entries. +#' @param position integer vector, indices of non-zero entries. +#' @param length Integer, Length of vector. +#' +#' @export +new_sparse_real <- function(value, position, length) { + x <- list(val = value, pos = as.integer(position), length = as.integer(length)) + + .Call(ffi_altrep_new_sparse_real, x) +} diff --git a/R/sparsevctrs-package.R b/R/sparsevctrs-package.R index 9cf920c..2f0ec6e 100644 --- a/R/sparsevctrs-package.R +++ b/R/sparsevctrs-package.R @@ -9,5 +9,6 @@ NULL ## usethis namespace: start +#' @useDynLib sparsevctrs, .registration = TRUE ## usethis namespace: end NULL diff --git a/man/new_sparse_real.Rd b/man/new_sparse_real.Rd new file mode 100644 index 0000000..9a11932 --- /dev/null +++ b/man/new_sparse_real.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/altrep.R +\name{new_sparse_real} +\alias{new_sparse_real} +\title{Create sparse numeric vector} +\usage{ +new_sparse_real(value, position, length) +} +\arguments{ +\item{value}{Numeric vector, values of non-zero entries.} + +\item{position}{integer vector, indices of non-zero entries.} + +\item{length}{Integer, Length of vector.} +} +\description{ +Create sparse numeric vector +} diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/altrep-sparse-real.c b/src/altrep-sparse-real.c new file mode 100644 index 0000000..3d55ab7 --- /dev/null +++ b/src/altrep-sparse-real.c @@ -0,0 +1,180 @@ +#define R_NO_REMAP +#include "R.h" +#include +#include + +// Initialised at load time +R_altrep_class_t altrep_sparse_real_class; + +SEXP ffi_altrep_new_sparse_real(SEXP x) { + return R_new_altrep(altrep_sparse_real_class, x, R_NilValue); +} + +SEXP alrep_sparse_real_Materialize(SEXP vec) { + SEXP out = R_altrep_data2(vec); + + if (out != R_NilValue) { + return out; + } + + SEXP data1 = R_altrep_data1(vec); + SEXP val = VECTOR_ELT(data1, 0); + SEXP pos = VECTOR_ELT(data1, 1); + SEXP len = VECTOR_ELT(data1, 2); + + R_xlen_t c_len = (R_xlen_t) INTEGER_ELT(len, 0); + + out = PROTECT(Rf_allocVector(REALSXP, c_len)); + + // Reminder about performance + for (R_xlen_t i = 0; i < c_len; ++i) { + SET_REAL_ELT(out, i, 0); + } + + R_xlen_t n_positions = Rf_xlength(pos); + + // Reminder about performance + for (R_xlen_t i = 0; i < n_positions; ++i) { + SET_REAL_ELT(out, INTEGER_ELT(pos, i) - 1, REAL_ELT(val, i)); + } + + R_set_altrep_data2(vec, out); + + UNPROTECT(1); + return out; +} + +// ----------------------------------------------------------------------------- +// ALTVEC + +void* altrep_sparse_real_Dataptr(SEXP x, Rboolean writeable) { + return STDVEC_DATAPTR(alrep_sparse_real_Materialize(x)); +} + +const void* altrep_sparse_real_Dataptr_or_null(SEXP vec) { + SEXP out = R_altrep_data2(vec); + + if (out == R_NilValue) { + return NULL; + } else { + return out; + } +} + +static SEXP altrep_sparse_real_Extract_subset(SEXP x, SEXP indx, SEXP call) { + + SEXP data1 = R_altrep_data1(x); + SEXP val_old = VECTOR_ELT(data1, 0); + SEXP pos_old = VECTOR_ELT(data1, 1); + SEXP matches = PROTECT(Rf_match(pos_old, indx, R_NaInt)); + + int n = 0; + + for (int i = 0; i < Rf_length(matches); ++i) { + if (INTEGER_ELT(matches, i) != R_NaInt) { + n++; + } + } + + SEXP val_new = PROTECT(Rf_allocVector(REALSXP, n)); + SEXP pos_new = PROTECT(Rf_allocVector(INTSXP, n)); + + int step = 0; + int what_pos = 1; + + for (int i = 0; i < Rf_length(matches); ++i) { + + int match = INTEGER_ELT(matches, i); + if (match != R_NaInt) { + SET_REAL_ELT(val_new, step, REAL_ELT(val_old, match - 1)); + + for (int j = 0; j < Rf_length(matches); ++j) { + if (INTEGER_ELT(indx, j) == INTEGER_ELT(pos_old, match - 1)) { + break; + } else { + what_pos++; + } + } + SET_INTEGER_ELT(pos_new, step, what_pos); + what_pos = 1; + step++; + } + } + + const char *names[] = {"val", "pos", "length", ""}; + SEXP res = PROTECT(Rf_mkNamed(VECSXP, names)); + SET_VECTOR_ELT(res, 0, val_new); + SET_VECTOR_ELT(res, 1, pos_new); + SET_VECTOR_ELT(res, 2, Rf_ScalarInteger(Rf_length(matches))); + + UNPROTECT(4); + + return ffi_altrep_new_sparse_real(res); +} + +// ----------------------------------------------------------------------------- +// ALTREP + +R_xlen_t altrep_sparse_real_Length(SEXP x) { + SEXP data = R_altrep_data1(x); + SEXP len = VECTOR_ELT(data, 2); + R_xlen_t out = (R_xlen_t) INTEGER_ELT(len, 0); + return out; +} + +// What gets printed when .Internal(inspect()) is used +Rboolean altrep_sparse_real_Inspect(SEXP x, + int pre, + int deep, + int pvec, + void (*inspect_subtree)(SEXP, int, int, int)) { + Rprintf("sparsevctrs_altrep_sparse_real (materialized=%s)\n", + R_altrep_data2(x) != R_NilValue ? "T" : "F"); + return TRUE; +} + +// ----------------------------------------------------------------------------- +// ALTREAL + +static double altrep_sparse_real_Elt(SEXP x, R_xlen_t i) { + SEXP data1 = R_altrep_data1(x); + SEXP val = VECTOR_ELT(data1, 0); + SEXP pos = VECTOR_ELT(data1, 1); + SEXP len = VECTOR_ELT(data1, 2); + R_xlen_t c_len = (R_xlen_t) INTEGER_ELT(len, 0); + + if (i > c_len) { + return NA_REAL; + } + + const R_xlen_t n = Rf_xlength(val); + + double out = 0; + + for (int j = 0; j < n; ++j) { + if (INTEGER_ELT(pos, j) == i + 1) { + out = REAL_ELT(val, j); + break; + } + } + + return out; +} + +// ----------------------------------------------------------------------------- + +void sparsevctrs_init_altrep_sparse_real(DllInfo* dll) { + altrep_sparse_real_class = R_make_altreal_class("altrep_sparse_real", "sparsevctrs", dll); + + // ALTVEC + R_set_altvec_Dataptr_method(altrep_sparse_real_class, altrep_sparse_real_Dataptr); + R_set_altvec_Dataptr_or_null_method(altrep_sparse_real_class, altrep_sparse_real_Dataptr_or_null); + R_set_altvec_Extract_subset_method(altrep_sparse_real_class, altrep_sparse_real_Extract_subset); + + // ALTREP + R_set_altrep_Length_method(altrep_sparse_real_class, altrep_sparse_real_Length); + R_set_altrep_Inspect_method(altrep_sparse_real_class, altrep_sparse_real_Inspect); + + // ALTREAL + R_set_altreal_Elt_method(altrep_sparse_real_class, altrep_sparse_real_Elt); +} diff --git a/src/init.c b/src/init.c new file mode 100644 index 0000000..12a160d --- /dev/null +++ b/src/init.c @@ -0,0 +1,18 @@ +#include + +// Defined in altrep-sparse-real.c +extern SEXP ffi_altrep_new_sparse_real(SEXP); +extern void sparsevctrs_init_altrep_sparse_real(DllInfo*); + +static const R_CallMethodDef CallEntries[] = { + {"ffi_altrep_new_sparse_real", (DL_FUNC) &ffi_altrep_new_sparse_real, 1}, + {NULL, NULL, 0} +}; + +void R_init_sparsevctrs(DllInfo* dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); + + // altrep classes + sparsevctrs_init_altrep_sparse_real(dll); +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..93d5488 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(sparsevctrs) + +test_check("sparsevctrs") diff --git a/tests/testthat/test-altrep.R b/tests/testthat/test-altrep.R new file mode 100644 index 0000000..4ab24f3 --- /dev/null +++ b/tests/testthat/test-altrep.R @@ -0,0 +1,43 @@ +test_that("length() works with new_sparse_real()", { + expect_identical( + length(new_sparse_real(numeric(), integer(), 0)), + 0L + ) + + expect_identical( + length(new_sparse_real(1, 1, 10)), + 10L + ) + + expect_identical( + length(new_sparse_real(1, 1, 100)), + 100L + ) +}) + +test_that("subsetting works with new_sparse_real()", { + x_sparse <- new_sparse_real(value = c(10, 13, 20), position = c(1, 5, 8), 10) + x_dense <- c(10, 0, 0, 0, 13, 0, 0, 20, 0, 0) + + for (i in seq_len(10)) { + expect_identical(x_sparse[i], x_dense[i]) + } + + expect_identical(x_sparse[1:2], x_dense[1:2]) + + expect_identical(x_sparse[3:7], x_dense[3:7]) + + expect_identical(x_sparse[-5], x_dense[-5]) + + expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) + + # testing outside range returns NA + # expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) +}) + +test_that("materialization works with new_sparse_real()", { + x_sparse <- new_sparse_real(value = c(10, 13, 20), position = c(1, 5, 8), 10) + x_dense <- c(10, 0, 0, 0, 13, 0, 0, 20, 0, 0) + + expect_identical(x_sparse[], x_dense) +}) \ No newline at end of file