From 277e14a55a05843ddcea735c57d4b4e5a13b888e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 8 Aug 2019 10:10:24 +0800 Subject: [PATCH] keep.rownames argument for transpose (#3715) --- NEWS.md | 2 + R/transpose.R | 29 ++++++++---- inst/tests/tests.Rraw | 16 ++++++- man/transpose.Rd | 5 +- src/transpose.c | 108 +++++++++++++++++++++--------------------- 5 files changed, 94 insertions(+), 66 deletions(-) diff --git a/NEWS.md b/NEWS.md index 030207708..f6a44b54d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -157,6 +157,8 @@ # data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4) ``` +22. `transpose` gains `keep.names=` and `make.names=` arguments, [#1886](https://github.com/Rdatatable/data.table/issues/1886). Previously, column names were dropped and there was no way to keep them. `keep.names="rn"` keeps the column names and puts them in the `"rn"` column of the result. Similarly, `make.names="rn"` uses column `"rn"` as the column names of the result. Both arguments are `NULL` by default for backwards compatibility. As these new arguments are new, they are subject to change in future according to community feedback. Thanks to @ghost for the request. + #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/R/transpose.R b/R/transpose.R index 5333dd788..2d275e0a8 100644 --- a/R/transpose.R +++ b/R/transpose.R @@ -1,12 +1,24 @@ -transpose = function(l, fill=NA, ignore.empty=FALSE) { - ans = .Call(Ctranspose, l, fill, ignore.empty) - if (is.data.table(l)) setDT(ans) - else if (is.data.frame(l)) { - if (is.null(names(ans))) - setattr(ans, "names", paste0("V", seq_along(ans))) - setattr(ans, "row.names", .set_row_names(length(ans[[1L]]))) - setattr(ans, "class", "data.frame") +transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) { + if (!is.null(make.names)) { + stopifnot(length(make.names)==1L) + if (is.character(make.names)) { + make.names=chmatch(make.names, names(l)) + if (is.na(make.names)) + stop("make.names='",make.names,"' not found in names of input") + } else { + make.names = as.integer(make.names) + if (is.na(make.names) || make.names<1L || make.names>length(l)) + stop("make.names=",make.names," is out of range [1,ncol=",length(l),"]") + } + colnames = as.character(l[[make.names]]) + l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names] } + ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names) + if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames)) + else if (is.data.frame(l)) # including data.table but not plain list + setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names))))) + if (is.data.table(l)) setDT(ans) + else if (is.data.frame(l)) setDF(ans) ans[] } @@ -34,3 +46,4 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { setattr(ans, 'names', names) ans } + diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ddecbf312..2a7d5d65e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6696,7 +6696,7 @@ test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed test(1477.08, transpose(1:5), error="l must be a list") test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type") test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is") -test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be NULL or length=1 vector") +test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector") test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE") test(1477.13, transpose(list()), list()) @@ -15509,6 +15509,20 @@ test(2072.066, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(letters[1:6 test(2072.067, fifelse(c(TRUE, NA, TRUE, FALSE, FALSE, FALSE), factor(NA), factor(NA)), factor(c(NA,NA,NA,NA,NA,NA))) +DT = data.table(x=1:5, y=6:10) +test(2073.1, transpose(DT, keep.names="rn"), + ans<-data.table(rn=c('x','y'), V1=c(1L, 6L), V2=c(2L, 7L), V3=c(3L, 8L), V4=c(4L, 9L), V5=c(5L, 10L))) +test(2073.2, transpose(DT, keep.names=TRUE), error="either NULL.*name of the first column of the result") +test(2073.3, transpose(ans, make.names="rn"), DT) +test(2073.4, transpose(ans, keep.names="rn", make.names="rn"), data.table(rn=paste0("V",1:5), x=1:5, y=6:10)) +L = list(a=1:3, rn=LETTERS[1:3], b=4:6) +test(2073.5, transpose(L, make.names=0), error="make.names=0 is out of range [1,ncol=3]") +test(2073.6, transpose(L, make.names=4), error="make.names=4 is out of range [1,ncol=3]") +test(2073.7, transpose(L, make.names=NA), error="make.names=NA is out of range [1,ncol=3]") +test(2073.8, transpose(L, make.names=2), list(A=INT(1,4), B=INT(2,5), C=INT(3,6))) +test(2073.9, transpose(L, make.names=2, keep.names='foo'), list(foo=c("a","b"), A=INT(1,4), B=INT(2,5), C=INT(3,6))) + + ################################### # Add new tests above this line # ################################### diff --git a/man/transpose.Rd b/man/transpose.Rd index 9281ca122..547688fd6 100644 --- a/man/transpose.Rd +++ b/man/transpose.Rd @@ -6,12 +6,14 @@ } \usage{ -transpose(l, fill=NA, ignore.empty=FALSE) +transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) } \arguments{ \item{l}{ A list, data.frame or data.table. } \item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. } \item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.} + \item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.} + \item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.} } \details{ The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result. @@ -21,7 +23,6 @@ transpose(l, fill=NA, ignore.empty=FALSE) This is particularly useful in tasks that require splitting a character column and assigning each part to a separate column. This operation is quite common enough that a function \code{\link{tstrsplit}} is exported. \code{factor} columns are converted to \code{character} type. Attributes are not preserved at the moment. This may change in the future. - } \value{ A transposed \code{list}, \code{data.frame} or \code{data.table}. diff --git a/src/transpose.c b/src/transpose.c index 717e95b99..567a9a80c 100644 --- a/src/transpose.c +++ b/src/transpose.c @@ -2,99 +2,97 @@ #include #include -SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) { +SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { + int nprotect=0; if (!isNewList(l)) error("l must be a list."); if (!length(l)) return(duplicate(l)); - if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL) + if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0]==NA_LOGICAL) error("ignore.empty should be logical TRUE/FALSE."); + bool ignore = LOGICAL(ignoreArg)[0]; + if (!(isNull(keepNamesArg) || (isString(keepNamesArg) && LENGTH(keepNamesArg)==1))) + error("keep.names should be either NULL, or the name of the first column of the result in which to place the names of the input"); + bool rn = !isNull(keepNamesArg); if (length(fill) != 1) - error("fill must be NULL or length=1 vector."); + error("fill must be a length 1 vector, such as the default NA"); R_len_t ln = LENGTH(l); - Rboolean ignore = LOGICAL(ignoreArg)[0]; // preprocessing - R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t)); int maxlen=0, zerolen=0; SEXPTYPE maxtype=0; for (int i=0; i maxlen) - maxlen = len[i]; - zerolen += (len[i] == 0); - if (isFactor(li)) { - maxtype = STRSXP; - } else { - SEXPTYPE type = TYPEOF(li); - if (type > maxtype) - maxtype = type; - } + const int len = length(li); + if (len>maxlen) maxlen=len; + zerolen += (len==0); + SEXPTYPE type = TYPEOF(li); + if (isFactor(li)) type=STRSXP; + if (type>maxtype) maxtype=type; } - // coerce fill to maxtype - fill = PROTECT(coerceVector(fill, maxtype)); + fill = PROTECT(coerceVector(fill, maxtype)); nprotect++; - // allocate 'ans' - SEXP ans = PROTECT(allocVector(VECSXP, maxlen)); - int anslen = (!ignore) ? ln : (ln - zerolen); + SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++; + int anslen = (ignore) ? (ln - zerolen) : ln; + if (rn) { + SEXP tt; + SET_VECTOR_ELT(ans, 0, tt=allocVector(STRSXP, anslen)); + SEXP lNames = PROTECT(getAttrib(l, R_NamesSymbol)); nprotect++; + for (int i=0, j=0; i