Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keep.rownames argument for transpose #3715

Merged
merged 5 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@
# data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4)
mattdowle marked this conversation as resolved.
Show resolved Hide resolved
```
22. `transpose` gains `keep.names=` and `make.names=` arguments, [#1886](https://github.com/Rdatatable/data.table/issues/1886). Previously, column names were dropped and there was no way to keep them. `keep.names="rn"` keeps the column names and puts them in the `"rn"` column of the result. Similarly, `make.names="rn"` uses column `"rn"` as the column names of the result. Both arguments are `NULL` by default for backwards compatibility. As these new arguments are new, they are subject to change in future according to community feedback. Thanks to @ghost for the request.
#### BUG FIXES
1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
29 changes: 21 additions & 8 deletions R/transpose.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
transpose = function(l, fill=NA, ignore.empty=FALSE) {
ans = .Call(Ctranspose, l, fill, ignore.empty)
if (is.data.table(l)) setDT(ans)
else if (is.data.frame(l)) {
if (is.null(names(ans)))
setattr(ans, "names", paste0("V", seq_along(ans)))
setattr(ans, "row.names", .set_row_names(length(ans[[1L]])))
setattr(ans, "class", "data.frame")
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) {
if (!is.null(make.names)) {
stopifnot(length(make.names)==1L)
if (is.character(make.names)) {
make.names=chmatch(make.names, names(l))
if (is.na(make.names))
stop("make.names='",make.names,"' not found in names of input")
} else {
make.names = as.integer(make.names)
if (is.na(make.names) || make.names<1L || make.names>length(l))
stop("make.names=",make.names," is out of range [1,ncol=",length(l),"]")
}
colnames = as.character(l[[make.names]])
l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names]
}
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names)
if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames))
else if (is.data.frame(l)) # including data.table but not plain list
setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names)))))
if (is.data.table(l)) setDT(ans)
else if (is.data.frame(l)) setDF(ans)
ans[]
}

Expand Down Expand Up @@ -34,3 +46,4 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
setattr(ans, 'names', names)
ans
}

16 changes: 15 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -6696,7 +6696,7 @@ test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed
test(1477.08, transpose(1:5), error="l must be a list")
test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be NULL or length=1 vector")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector")
test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE")
test(1477.13, transpose(list()), list())

Expand Down Expand Up @@ -15509,6 +15509,20 @@ test(2072.066, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(letters[1:6
test(2072.067, fifelse(c(TRUE, NA, TRUE, FALSE, FALSE, FALSE), factor(NA), factor(NA)),
factor(c(NA,NA,NA,NA,NA,NA)))

DT = data.table(x=1:5, y=6:10)
test(2073.1, transpose(DT, keep.names="rn"),
ans<-data.table(rn=c('x','y'), V1=c(1L, 6L), V2=c(2L, 7L), V3=c(3L, 8L), V4=c(4L, 9L), V5=c(5L, 10L)))
test(2073.2, transpose(DT, keep.names=TRUE), error="either NULL.*name of the first column of the result")
test(2073.3, transpose(ans, make.names="rn"), DT)
test(2073.4, transpose(ans, keep.names="rn", make.names="rn"), data.table(rn=paste0("V",1:5), x=1:5, y=6:10))
L = list(a=1:3, rn=LETTERS[1:3], b=4:6)
test(2073.5, transpose(L, make.names=0), error="make.names=0 is out of range [1,ncol=3]")
test(2073.6, transpose(L, make.names=4), error="make.names=4 is out of range [1,ncol=3]")
test(2073.7, transpose(L, make.names=NA), error="make.names=NA is out of range [1,ncol=3]")
test(2073.8, transpose(L, make.names=2), list(A=INT(1,4), B=INT(2,5), C=INT(3,6)))
test(2073.9, transpose(L, make.names=2, keep.names='foo'), list(foo=c("a","b"), A=INT(1,4), B=INT(2,5), C=INT(3,6)))


###################################
# Add new tests above this line #
###################################
Expand Down
5 changes: 3 additions & 2 deletions man/transpose.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
}

\usage{
transpose(l, fill=NA, ignore.empty=FALSE)
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL)
}
\arguments{
\item{l}{ A list, data.frame or data.table. }
\item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. }
\item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.}
\item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.}
\item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.}
}
\details{
The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result.
Expand All @@ -21,7 +23,6 @@ transpose(l, fill=NA, ignore.empty=FALSE)
This is particularly useful in tasks that require splitting a character column and assigning each part to a separate column. This operation is quite common enough that a function \code{\link{tstrsplit}} is exported.

\code{factor} columns are converted to \code{character} type. Attributes are not preserved at the moment. This may change in the future.

}
\value{
A transposed \code{list}, \code{data.frame} or \code{data.table}.
Expand Down
108 changes: 53 additions & 55 deletions src/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,99 +2,97 @@
#include <Rdefines.h>
#include <time.h>

SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {

int nprotect=0;
if (!isNewList(l))
error("l must be a list.");
if (!length(l))
return(duplicate(l));
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL)
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0]==NA_LOGICAL)
error("ignore.empty should be logical TRUE/FALSE.");
bool ignore = LOGICAL(ignoreArg)[0];
if (!(isNull(keepNamesArg) || (isString(keepNamesArg) && LENGTH(keepNamesArg)==1)))
error("keep.names should be either NULL, or the name of the first column of the result in which to place the names of the input");
bool rn = !isNull(keepNamesArg);
if (length(fill) != 1)
error("fill must be NULL or length=1 vector.");
error("fill must be a length 1 vector, such as the default NA");
R_len_t ln = LENGTH(l);
Rboolean ignore = LOGICAL(ignoreArg)[0];

// preprocessing
R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t));
int maxlen=0, zerolen=0;
SEXPTYPE maxtype=0;
for (int i=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
if (!isVectorAtomic(li) && !isNull(li))
error("Item %d of list input is not an atomic vector", i+1);
len[i] = length(li);
if (len[i] > maxlen)
maxlen = len[i];
zerolen += (len[i] == 0);
if (isFactor(li)) {
maxtype = STRSXP;
} else {
SEXPTYPE type = TYPEOF(li);
if (type > maxtype)
maxtype = type;
}
const int len = length(li);
if (len>maxlen) maxlen=len;
zerolen += (len==0);
SEXPTYPE type = TYPEOF(li);
if (isFactor(li)) type=STRSXP;
if (type>maxtype) maxtype=type;
}
// coerce fill to maxtype
fill = PROTECT(coerceVector(fill, maxtype));
fill = PROTECT(coerceVector(fill, maxtype)); nprotect++;

// allocate 'ans'
SEXP ans = PROTECT(allocVector(VECSXP, maxlen));
int anslen = (!ignore) ? ln : (ln - zerolen);
SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++;
int anslen = (ignore) ? (ln - zerolen) : ln;
if (rn) {
SEXP tt;
SET_VECTOR_ELT(ans, 0, tt=allocVector(STRSXP, anslen));
SEXP lNames = PROTECT(getAttrib(l, R_NamesSymbol)); nprotect++;
for (int i=0, j=0; i<ln; ++i) {
if (length(VECTOR_ELT(l, i))) SET_STRING_ELT(tt, j++, STRING_ELT(lNames, i));
}
}
for (int i=0; i<maxlen; ++i) {
SET_VECTOR_ELT(ans, i, allocVector(maxtype, anslen));
SET_VECTOR_ELT(ans, i+rn, allocVector(maxtype, anslen));
}

// transpose
int k=0;
for (int i=0; i<ln; ++i) {
if (ignore && !len[i]) continue;
const SEXP *ansp = VECTOR_PTR(ans);
for (int i=0, k=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
const int len = length(li);
if (ignore && len==0) continue;
bool coerce = false;
if (TYPEOF(li) != maxtype) {
coerce = true;
li = PROTECT(isFactor(li) ? asCharacterFactor(li) : coerceVector(li, maxtype));
coerce = true;
}
switch (maxtype) { // TODO remove more macros
case INTSXP : {
const int *ili = INTEGER(li);
const int *ifill = INTEGER(fill);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
INTEGER(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
}
}
break;
switch (maxtype) {
case LGLSXP : {
const int *ili = LOGICAL(li);
const int *ifill = LOGICAL(fill);
const int ifill = LOGICAL(fill)[0];
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
LOGICAL(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
LOGICAL(ansp[j+rn])[k] = j<len ? ili[j] : ifill;
}
}
break;
} break;
case INTSXP : {
const int *ili = INTEGER(li);
const int ifill = INTEGER(fill)[0];
for (int j=0; j<maxlen; ++j) {
INTEGER(ansp[j+rn])[k] = j<len ? ili[j] : ifill;
}
} break;
case REALSXP : {
const double *dli = REAL(li);
const double *dfill = REAL(fill);
const double dfill = REAL(fill)[0];
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
REAL(thisi)[k] = (j < len[i]) ? dli[j] : dfill[0];
REAL(ansp[j+rn])[k] = j<len ? dli[j] : dfill;
}
}
break;
case STRSXP :
} break;
case STRSXP : {
const SEXP sfill = STRING_ELT(fill, 0);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SET_STRING_ELT(thisi, k, (j < len[i]) ? STRING_ELT(li, j) : STRING_ELT(fill, 0));
SET_STRING_ELT(ansp[j+rn], k, j<len ? STRING_ELT(li, j) : sfill);
}
break;
} break;
default :
error("Unsupported column type '%s'", type2char(maxtype));
error("Unsupported column type '%s'", type2char(maxtype));
}
if (coerce) UNPROTECT(1);
k++;
}
UNPROTECT(2);
return(ans);
UNPROTECT(nprotect);
return ans;
}