Skip to content

Commit

Permalink
keep.rownames argument for transpose (#3715)
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chirico authored and mattdowle committed Aug 8, 2019
1 parent a8e0230 commit 277e14a
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 66 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@
# data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4)
```
22. `transpose` gains `keep.names=` and `make.names=` arguments, [#1886](https://github.com/Rdatatable/data.table/issues/1886). Previously, column names were dropped and there was no way to keep them. `keep.names="rn"` keeps the column names and puts them in the `"rn"` column of the result. Similarly, `make.names="rn"` uses column `"rn"` as the column names of the result. Both arguments are `NULL` by default for backwards compatibility. As these new arguments are new, they are subject to change in future according to community feedback. Thanks to @ghost for the request.
#### BUG FIXES
1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
29 changes: 21 additions & 8 deletions R/transpose.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
transpose = function(l, fill=NA, ignore.empty=FALSE) {
ans = .Call(Ctranspose, l, fill, ignore.empty)
if (is.data.table(l)) setDT(ans)
else if (is.data.frame(l)) {
if (is.null(names(ans)))
setattr(ans, "names", paste0("V", seq_along(ans)))
setattr(ans, "row.names", .set_row_names(length(ans[[1L]])))
setattr(ans, "class", "data.frame")
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) {
if (!is.null(make.names)) {
stopifnot(length(make.names)==1L)
if (is.character(make.names)) {
make.names=chmatch(make.names, names(l))
if (is.na(make.names))
stop("make.names='",make.names,"' not found in names of input")
} else {
make.names = as.integer(make.names)
if (is.na(make.names) || make.names<1L || make.names>length(l))
stop("make.names=",make.names," is out of range [1,ncol=",length(l),"]")
}
colnames = as.character(l[[make.names]])
l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names]
}
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names)
if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames))
else if (is.data.frame(l)) # including data.table but not plain list
setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names)))))
if (is.data.table(l)) setDT(ans)
else if (is.data.frame(l)) setDF(ans)
ans[]
}

Expand Down Expand Up @@ -34,3 +46,4 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
setattr(ans, 'names', names)
ans
}

16 changes: 15 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -6696,7 +6696,7 @@ test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed
test(1477.08, transpose(1:5), error="l must be a list")
test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be NULL or length=1 vector")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector")
test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE")
test(1477.13, transpose(list()), list())

Expand Down Expand Up @@ -15509,6 +15509,20 @@ test(2072.066, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(letters[1:6
test(2072.067, fifelse(c(TRUE, NA, TRUE, FALSE, FALSE, FALSE), factor(NA), factor(NA)),
factor(c(NA,NA,NA,NA,NA,NA)))

DT = data.table(x=1:5, y=6:10)
test(2073.1, transpose(DT, keep.names="rn"),
ans<-data.table(rn=c('x','y'), V1=c(1L, 6L), V2=c(2L, 7L), V3=c(3L, 8L), V4=c(4L, 9L), V5=c(5L, 10L)))
test(2073.2, transpose(DT, keep.names=TRUE), error="either NULL.*name of the first column of the result")
test(2073.3, transpose(ans, make.names="rn"), DT)
test(2073.4, transpose(ans, keep.names="rn", make.names="rn"), data.table(rn=paste0("V",1:5), x=1:5, y=6:10))
L = list(a=1:3, rn=LETTERS[1:3], b=4:6)
test(2073.5, transpose(L, make.names=0), error="make.names=0 is out of range [1,ncol=3]")
test(2073.6, transpose(L, make.names=4), error="make.names=4 is out of range [1,ncol=3]")
test(2073.7, transpose(L, make.names=NA), error="make.names=NA is out of range [1,ncol=3]")
test(2073.8, transpose(L, make.names=2), list(A=INT(1,4), B=INT(2,5), C=INT(3,6)))
test(2073.9, transpose(L, make.names=2, keep.names='foo'), list(foo=c("a","b"), A=INT(1,4), B=INT(2,5), C=INT(3,6)))


###################################
# Add new tests above this line #
###################################
Expand Down
5 changes: 3 additions & 2 deletions man/transpose.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
}

\usage{
transpose(l, fill=NA, ignore.empty=FALSE)
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL)
}
\arguments{
\item{l}{ A list, data.frame or data.table. }
\item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. }
\item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.}
\item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.}
\item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.}
}
\details{
The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result.
Expand All @@ -21,7 +23,6 @@ transpose(l, fill=NA, ignore.empty=FALSE)
This is particularly useful in tasks that require splitting a character column and assigning each part to a separate column. This operation is quite common enough that a function \code{\link{tstrsplit}} is exported.

\code{factor} columns are converted to \code{character} type. Attributes are not preserved at the moment. This may change in the future.

}
\value{
A transposed \code{list}, \code{data.frame} or \code{data.table}.
Expand Down
108 changes: 53 additions & 55 deletions src/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,99 +2,97 @@
#include <Rdefines.h>
#include <time.h>

SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {

int nprotect=0;
if (!isNewList(l))
error("l must be a list.");
if (!length(l))
return(duplicate(l));
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL)
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0]==NA_LOGICAL)
error("ignore.empty should be logical TRUE/FALSE.");
bool ignore = LOGICAL(ignoreArg)[0];
if (!(isNull(keepNamesArg) || (isString(keepNamesArg) && LENGTH(keepNamesArg)==1)))
error("keep.names should be either NULL, or the name of the first column of the result in which to place the names of the input");
bool rn = !isNull(keepNamesArg);
if (length(fill) != 1)
error("fill must be NULL or length=1 vector.");
error("fill must be a length 1 vector, such as the default NA");
R_len_t ln = LENGTH(l);
Rboolean ignore = LOGICAL(ignoreArg)[0];

// preprocessing
R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t));
int maxlen=0, zerolen=0;
SEXPTYPE maxtype=0;
for (int i=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
if (!isVectorAtomic(li) && !isNull(li))
error("Item %d of list input is not an atomic vector", i+1);
len[i] = length(li);
if (len[i] > maxlen)
maxlen = len[i];
zerolen += (len[i] == 0);
if (isFactor(li)) {
maxtype = STRSXP;
} else {
SEXPTYPE type = TYPEOF(li);
if (type > maxtype)
maxtype = type;
}
const int len = length(li);
if (len>maxlen) maxlen=len;
zerolen += (len==0);
SEXPTYPE type = TYPEOF(li);
if (isFactor(li)) type=STRSXP;
if (type>maxtype) maxtype=type;
}
// coerce fill to maxtype
fill = PROTECT(coerceVector(fill, maxtype));
fill = PROTECT(coerceVector(fill, maxtype)); nprotect++;

// allocate 'ans'
SEXP ans = PROTECT(allocVector(VECSXP, maxlen));
int anslen = (!ignore) ? ln : (ln - zerolen);
SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++;
int anslen = (ignore) ? (ln - zerolen) : ln;
if (rn) {
SEXP tt;
SET_VECTOR_ELT(ans, 0, tt=allocVector(STRSXP, anslen));
SEXP lNames = PROTECT(getAttrib(l, R_NamesSymbol)); nprotect++;
for (int i=0, j=0; i<ln; ++i) {
if (length(VECTOR_ELT(l, i))) SET_STRING_ELT(tt, j++, STRING_ELT(lNames, i));
}
}
for (int i=0; i<maxlen; ++i) {
SET_VECTOR_ELT(ans, i, allocVector(maxtype, anslen));
SET_VECTOR_ELT(ans, i+rn, allocVector(maxtype, anslen));
}

// transpose
int k=0;
for (int i=0; i<ln; ++i) {
if (ignore && !len[i]) continue;
const SEXP *ansp = VECTOR_PTR(ans);
for (int i=0, k=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
const int len = length(li);
if (ignore && len==0) continue;
bool coerce = false;
if (TYPEOF(li) != maxtype) {
coerce = true;
li = PROTECT(isFactor(li) ? asCharacterFactor(li) : coerceVector(li, maxtype));
coerce = true;
}
switch (maxtype) { // TODO remove more macros
case INTSXP : {
const int *ili = INTEGER(li);
const int *ifill = INTEGER(fill);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
INTEGER(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
}
}
break;
switch (maxtype) {
case LGLSXP : {
const int *ili = LOGICAL(li);
const int *ifill = LOGICAL(fill);
const int ifill = LOGICAL(fill)[0];
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
LOGICAL(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
LOGICAL(ansp[j+rn])[k] = j<len ? ili[j] : ifill;
}
}
break;
} break;
case INTSXP : {
const int *ili = INTEGER(li);
const int ifill = INTEGER(fill)[0];
for (int j=0; j<maxlen; ++j) {
INTEGER(ansp[j+rn])[k] = j<len ? ili[j] : ifill;
}
} break;
case REALSXP : {
const double *dli = REAL(li);
const double *dfill = REAL(fill);
const double dfill = REAL(fill)[0];
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
REAL(thisi)[k] = (j < len[i]) ? dli[j] : dfill[0];
REAL(ansp[j+rn])[k] = j<len ? dli[j] : dfill;
}
}
break;
case STRSXP :
} break;
case STRSXP : {
const SEXP sfill = STRING_ELT(fill, 0);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SET_STRING_ELT(thisi, k, (j < len[i]) ? STRING_ELT(li, j) : STRING_ELT(fill, 0));
SET_STRING_ELT(ansp[j+rn], k, j<len ? STRING_ELT(li, j) : sfill);
}
break;
} break;
default :
error("Unsupported column type '%s'", type2char(maxtype));
error("Unsupported column type '%s'", type2char(maxtype));
}
if (coerce) UNPROTECT(1);
k++;
}
UNPROTECT(2);
return(ans);
UNPROTECT(nprotect);
return ans;
}

0 comments on commit 277e14a

Please sign in to comment.