diff --git a/NEWS.md b/NEWS.md index b5f9cb61..f0d78a5d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,19 @@ # cpp11 (development version) +* Removed usage of the following non-API functions: + * `SETLENGTH()` + * `SET_TRUELENGTH()` + * `SET_GROWABLE_BIT()` + + These functions were used as part of the efficient growable vectors that + cpp11 offered, i.e. what happens under the hood when you use `push_back()`. + The removal of these non-API functions means that cpp11 writable vectors that + have been pushed to with `push_back()` will likely force 1 extra allocation + when the conversion from `cpp11::writable::r_vector` to `SEXP` occurs + (typically when you return a result back to R). This does not affect the + performance of `push_back()` itself, and in general these growable vectors + are still quite efficient (#362). + * Fixed a memory leak with the `cpp11::writable::r_vector` move assignment operator (#338). diff --git a/cpp11test/R/cpp11.R b/cpp11test/R/cpp11.R index da84b9ec..bb3153e3 100644 --- a/cpp11test/R/cpp11.R +++ b/cpp11test/R/cpp11.R @@ -224,6 +224,10 @@ rcpp_grow_ <- function(n_sxp) { .Call(`_cpp11test_rcpp_grow_`, n_sxp) } +rcpp_push_and_truncate_ <- function(size_sxp) { + .Call(`_cpp11test_rcpp_push_and_truncate_`, size_sxp) +} + test_destruction_inner <- function() { invisible(.Call(`_cpp11test_test_destruction_inner`)) } @@ -231,3 +235,7 @@ test_destruction_inner <- function() { test_destruction_outer <- function() { invisible(.Call(`_cpp11test_test_destruction_outer`)) } + +cpp11_push_and_truncate_ <- function(size_sexp) { + .Call(`_cpp11test_cpp11_push_and_truncate_`, size_sexp) +} diff --git a/cpp11test/bench/truncate.R b/cpp11test/bench/truncate.R new file mode 100644 index 00000000..a074e430 --- /dev/null +++ b/cpp11test/bench/truncate.R @@ -0,0 +1,20 @@ +pkgload::load_all("cpp11test") + +bench::press(len = as.integer(10 ^ (0:6)), + { + bench::mark( + cpp11 = cpp11_push_and_truncate_(len), + rcpp = rcpp_push_and_truncate_(len), + check = FALSE, + min_iterations = 1000 + ) + } +)[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")] + +# Longer benchmark, lots of gc +len <- as.integer(10 ^ 7) +bench::mark( + cpp11 = cpp11_push_and_truncate_(len), + rcpp = rcpp_push_and_truncate_(len), + min_iterations = 200 +)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")] diff --git a/cpp11test/src/cpp11.cpp b/cpp11test/src/cpp11.cpp index b18e23df..7cfa36c3 100644 --- a/cpp11test/src/cpp11.cpp +++ b/cpp11test/src/cpp11.cpp @@ -422,6 +422,13 @@ extern "C" SEXP _cpp11test_rcpp_grow_(SEXP n_sxp) { return cpp11::as_sexp(rcpp_grow_(cpp11::as_cpp>(n_sxp))); END_CPP11 } +// sum_rcpp.cpp +SEXP rcpp_push_and_truncate_(SEXP size_sxp); +extern "C" SEXP _cpp11test_rcpp_push_and_truncate_(SEXP size_sxp) { + BEGIN_CPP11 + return cpp11::as_sexp(rcpp_push_and_truncate_(cpp11::as_cpp>(size_sxp))); + END_CPP11 +} // test-protect-nested.cpp void test_destruction_inner(); extern "C" SEXP _cpp11test_test_destruction_inner() { @@ -438,6 +445,13 @@ extern "C" SEXP _cpp11test_test_destruction_outer() { return R_NilValue; END_CPP11 } +// truncate.cpp +SEXP cpp11_push_and_truncate_(SEXP size_sexp); +extern "C" SEXP _cpp11test_cpp11_push_and_truncate_(SEXP size_sexp) { + BEGIN_CPP11 + return cpp11::as_sexp(cpp11_push_and_truncate_(cpp11::as_cpp>(size_sexp))); + END_CPP11 +} extern "C" { /* .Call calls */ @@ -447,6 +461,7 @@ static const R_CallMethodDef CallEntries[] = { {"_cpp11test_col_sums", (DL_FUNC) &_cpp11test_col_sums, 1}, {"_cpp11test_cpp11_add_vec_for_", (DL_FUNC) &_cpp11test_cpp11_add_vec_for_, 2}, {"_cpp11test_cpp11_insert_", (DL_FUNC) &_cpp11test_cpp11_insert_, 1}, + {"_cpp11test_cpp11_push_and_truncate_", (DL_FUNC) &_cpp11test_cpp11_push_and_truncate_, 1}, {"_cpp11test_cpp11_release_", (DL_FUNC) &_cpp11test_cpp11_release_, 1}, {"_cpp11test_cpp11_safe_", (DL_FUNC) &_cpp11test_cpp11_safe_, 1}, {"_cpp11test_data_frame_", (DL_FUNC) &_cpp11test_data_frame_, 0}, @@ -481,6 +496,7 @@ static const R_CallMethodDef CallEntries[] = { {"_cpp11test_protect_one_preserve_", (DL_FUNC) &_cpp11test_protect_one_preserve_, 2}, {"_cpp11test_protect_one_sexp_", (DL_FUNC) &_cpp11test_protect_one_sexp_, 2}, {"_cpp11test_rcpp_grow_", (DL_FUNC) &_cpp11test_rcpp_grow_, 1}, + {"_cpp11test_rcpp_push_and_truncate_", (DL_FUNC) &_cpp11test_rcpp_push_and_truncate_, 1}, {"_cpp11test_rcpp_release_", (DL_FUNC) &_cpp11test_rcpp_release_, 1}, {"_cpp11test_rcpp_sum_dbl_accumulate_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_accumulate_, 1}, {"_cpp11test_rcpp_sum_dbl_for_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_for_, 1}, diff --git a/cpp11test/src/sum_rcpp.cpp b/cpp11test/src/sum_rcpp.cpp index 47a5c0e9..035edef7 100644 --- a/cpp11test/src/sum_rcpp.cpp +++ b/cpp11test/src/sum_rcpp.cpp @@ -56,3 +56,15 @@ return x; } + +[[cpp11::register]] SEXP rcpp_push_and_truncate_(SEXP size_sxp) { + R_xlen_t size = INTEGER(size_sxp)[0]; + + // Allocate `size` worth of doubles (filled with garbage data) + Rcpp::NumericVector out(size); + + // Push 1 more past the existing capacity + out.push_back(0); + + return out; +} diff --git a/cpp11test/src/truncate.cpp b/cpp11test/src/truncate.cpp new file mode 100644 index 00000000..0e065f1c --- /dev/null +++ b/cpp11test/src/truncate.cpp @@ -0,0 +1,15 @@ +#include "cpp11/doubles.hpp" + +[[cpp11::register]] SEXP cpp11_push_and_truncate_(SEXP size_sexp) { + R_xlen_t size = INTEGER(size_sexp)[0]; + + // Allocate `size` worth of doubles (filled with garbage data) + cpp11::writable::doubles out(size); + + // Push 1 more past the existing length/capacity, + // doubling the capacity for cpp11 vectors + out.push_back(0); + + // Truncate back to `size + 1` size and return result. + return SEXP(out); +} diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 90758d32..655f7229 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -897,33 +897,30 @@ inline void r_vector::clear() { length_ = 0; } -inline SEXP truncate(SEXP x, R_xlen_t length, R_xlen_t capacity) { -#if R_VERSION >= R_Version(3, 4, 0) - SETLENGTH(x, length); - SET_TRUELENGTH(x, capacity); - SET_GROWABLE_BIT(x); -#else - x = safe[Rf_lengthgets](x, length); -#endif - return x; -} - template inline r_vector::operator SEXP() const { + // Throwing away the const-ness is a bit gross, but we only modify + // internal details here, and updating the internal data after we resize allows + // statements like `Rf_setAttrib(, name, value)` to make sense, where + // people expect that the SEXP inside the `` gets the updated attribute. auto* p = const_cast*>(this); + if (data_ == R_NilValue) { + // Specially call out the `NULL` case, which can occur if immediately + // returning a default constructed writable `r_vector` as a `SEXP`. p->resize(0); return data_; } + if (length_ < capacity_) { - p->data_ = truncate(p->data_, length_, capacity_); - SEXP nms = names(); - auto nms_size = Rf_xlength(nms); - if ((nms_size > 0) && (length_ < nms_size)) { - nms = truncate(nms, length_, capacity_); - names() = nms; - } + // Truncate the vector to its `length_`. This unfortunately typically forces + // an allocation if the user has called `push_back()` on a writable + // `r_vector`. Importantly, going through `resize()` updates: `data_` and + // protection of it, `data_p_`, and `capacity_`. + p->resize(length_); + return data_; } + return data_; } diff --git a/vignettes/motivations.Rmd b/vignettes/motivations.Rmd index 67391d61..9f5ffb0f 100644 --- a/vignettes/motivations.Rmd +++ b/vignettes/motivations.Rmd @@ -487,7 +487,8 @@ b_grow <- bench::press(.grid = grid, { fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_")))) bench::mark( - fun(len) + fun(len), + min_iterations = 100 ) } )[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")]