Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keep.rownames argument for transpose #3715

Merged
merged 5 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@
# data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4)
mattdowle marked this conversation as resolved.
Show resolved Hide resolved
```

22. `transpose` gains `keep.rownames` argument which stores the input's names at the beginning of the output (the first list element or the first column), [#1886](https://github.com/Rdatatable/data.table/issues/1886). Thanks to @ghost for the request.

#### BUG FIXES

1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
4 changes: 2 additions & 2 deletions R/transpose.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
transpose = function(l, fill=NA, ignore.empty=FALSE) {
ans = .Call(Ctranspose, l, fill, ignore.empty)
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.rownames=FALSE) {
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.rownames)
if (is.data.table(l)) setDT(ans)
else if (is.data.frame(l)) {
if (is.null(names(ans)))
Expand Down
5 changes: 5 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15509,6 +15509,11 @@ test(2072.066, fifelse(c(TRUE, TRUE, TRUE, FALSE, NA, FALSE), factor(letters[1:6
test(2072.067, fifelse(c(TRUE, NA, TRUE, FALSE, FALSE, FALSE), factor(NA), factor(NA)),
factor(c(NA,NA,NA,NA,NA,NA)))

DT = data.table(x=1:5, y=6:10)
test(2073.1, transpose(DT, keep.rownames = TRUE),
data.table(c('x','y'), c(1L, 6L), c(2L, 7L), c(3L, 8L), c(4L, 9L), c(5L, 10L)))


###################################
# Add new tests above this line #
###################################
Expand Down
3 changes: 2 additions & 1 deletion man/transpose.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
}

\usage{
transpose(l, fill=NA, ignore.empty=FALSE)
transpose(l, fill=NA, ignore.empty=FALSE, keep.rownames=FALSE)
}
\arguments{
\item{l}{ A list, data.frame or data.table. }
\item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. }
\item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.}
\item{keep.rownames}{ \code{logical} scalar; if \code{TRUE}, the first column (or list element) of the output will contain the names of the input. }
}
\details{
The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result.
Expand Down
55 changes: 41 additions & 14 deletions src/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,28 @@
#include <Rdefines.h>
#include <time.h>

SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepRowNamesArg) {

if (!isNewList(l))
error("l must be a list.");
if (!length(l))
return(duplicate(l));
if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL)
error("ignore.empty should be logical TRUE/FALSE.");
if (!isLogical(keepRowNamesArg) || LOGICAL(keepRowNamesArg)[0] == NA_LOGICAL)
error("keep.rownames should be logical TRUE/FALSE.");
if (length(fill) != 1)
error("fill must be NULL or length=1 vector.");
R_len_t ln = LENGTH(l);
Rboolean ignore = LOGICAL(ignoreArg)[0];
Rboolean keepRowNames = LOGICAL(keepRowNamesArg)[0];

// preprocessing
R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t));
int maxlen=0, zerolen=0;
SEXPTYPE maxtype=0;
// stop incrementing maxtype as soon as STRSXP is found
bool hasStrType=false;
for (int i=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
if (!isVectorAtomic(li) && !isNull(li))
Expand All @@ -27,22 +32,44 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
if (len[i] > maxlen)
maxlen = len[i];
zerolen += (len[i] == 0);
if (isFactor(li)) {
maxtype = STRSXP;
} else {
SEXPTYPE type = TYPEOF(li);
if (type > maxtype)
maxtype = type;
if (!hasStrType) {
if (isFactor(li)) {
maxtype = STRSXP;
hasStrType = true;
} else {
SEXPTYPE type = TYPEOF(li);
if (type == STRSXP) {
maxtype = type;
hasStrType = true;
continue;
}
if (type > maxtype)
maxtype = type;
}
}
}
// coerce fill to maxtype
fill = PROTECT(coerceVector(fill, maxtype));

// allocate 'ans'
SEXP ans = PROTECT(allocVector(VECSXP, maxlen));
int anslen = (!ignore) ? ln : (ln - zerolen);
int dataStart= (int) keepRowNames;
SEXP ans = PROTECT(allocVector(VECSXP, keepRowNames ? maxlen+1 : maxlen));
int anslen = (ignore) ? (ln - zerolen) : ln;
if (keepRowNames) {
SEXP rowNames = PROTECT(allocVector(STRSXP, anslen));
if (anslen < ln) {
SEXP lNames = getAttrib(l, R_NamesSymbol);
for (int i=0, j=0; i < ln; i++) {
if (length(VECTOR_ELT(l, i)) != 0)
SET_STRING_ELT(rowNames, j++, STRING_ELT(lNames, i));
}
} else {
SET_VECTOR_ELT(ans, 0, getAttrib(l, R_NamesSymbol));
}
UNPROTECT(1);
}
for (int i=0; i<maxlen; ++i) {
SET_VECTOR_ELT(ans, i, allocVector(maxtype, anslen));
SET_VECTOR_ELT(ans, i+dataStart, allocVector(maxtype, anslen));
}

// transpose
Expand All @@ -60,7 +87,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
const int *ili = INTEGER(li);
const int *ifill = INTEGER(fill);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SEXP thisi = VECTOR_ELT(ans, j+dataStart);
INTEGER(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
}
}
Expand All @@ -69,7 +96,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
const int *ili = LOGICAL(li);
const int *ifill = LOGICAL(fill);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SEXP thisi = VECTOR_ELT(ans, j+dataStart);
LOGICAL(thisi)[k] = (j < len[i]) ? ili[j] : ifill[0];
}
}
Expand All @@ -78,14 +105,14 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) {
const double *dli = REAL(li);
const double *dfill = REAL(fill);
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SEXP thisi = VECTOR_ELT(ans, j+dataStart);
REAL(thisi)[k] = (j < len[i]) ? dli[j] : dfill[0];
}
}
break;
case STRSXP :
for (int j=0; j<maxlen; ++j) {
SEXP thisi = VECTOR_ELT(ans, j);
SEXP thisi = VECTOR_ELT(ans, j+dataStart);
SET_STRING_ELT(thisi, k, (j < len[i]) ? STRING_ELT(li, j) : STRING_ELT(fill, 0));
}
break;
Expand Down