From 9e90b7ad4b263e5c2b04a5887989c1bd4f338a58 Mon Sep 17 00:00:00 2001 From: innesbre Date: Sat, 2 Mar 2019 13:41:01 -0500 Subject: [PATCH] v1.1.0 Plotting functions now exported. --- NAMESPACE | 17 + R/deTest.R | 344 ++++++++++++++----- R/helperFx.R | 10 +- R/runViz.R | 164 ++++++--- R/shinyModules.R | 548 +++++++++++++++++++++++++++---- man/CalcAllSCV.Rd | 7 +- man/CalcCGS.Rd | 7 +- man/CalcDEcombn.Rd | 7 +- man/CalcDEvsRest.Rd | 7 +- man/CalcSCV.Rd | 5 +- man/dotplotDEgenes.Rd | 32 ++ man/fx_calcCGS_BP.Rd | 34 ++ man/fx_calcDEcombn_BP.Rd | 37 +++ man/fx_calcDEvsRest_BP.Rd | 35 ++ man/fx_calcESvsRest_BP.Rd | 42 +++ man/labelCellTypes.Rd | 2 +- man/plot_GEboxplot.Rd | 36 ++ man/plot_clustSep.Rd | 42 +++ man/plot_clusterGenes_DEgenes.Rd | 43 +++ man/plot_clusterGenes_markers.Rd | 65 ++++ man/plot_clusterGenes_search.Rd | 32 ++ man/plot_compareClusts.Rd | 54 +++ man/plot_deDotplot.Rd | 37 +++ man/plot_mdCompare.Rd | 43 +++ man/plot_mdPerClust.Rd | 36 ++ man/plot_sil.Rd | 15 + man/plot_tsne.Rd | 68 ++++ man/runShiny.Rd | 9 +- man/tsne_labels.Rd | 24 ++ 29 files changed, 1594 insertions(+), 208 deletions(-) create mode 100644 man/dotplotDEgenes.Rd create mode 100644 man/fx_calcCGS_BP.Rd create mode 100644 man/fx_calcDEcombn_BP.Rd create mode 100644 man/fx_calcDEvsRest_BP.Rd create mode 100644 man/fx_calcESvsRest_BP.Rd create mode 100644 man/plot_GEboxplot.Rd create mode 100644 man/plot_clustSep.Rd create mode 100644 man/plot_clusterGenes_DEgenes.Rd create mode 100644 man/plot_clusterGenes_markers.Rd create mode 100644 man/plot_clusterGenes_search.Rd create mode 100644 man/plot_compareClusts.Rd create mode 100644 man/plot_deDotplot.Rd create mode 100644 man/plot_mdCompare.Rd create mode 100644 man/plot_mdPerClust.Rd create mode 100644 man/plot_sil.Rd create mode 100644 man/plot_tsne.Rd create mode 100644 man/tsne_labels.Rd diff --git a/NAMESPACE b/NAMESPACE index fdaa21c..13bb742 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,13 +10,30 @@ export(DEdist) export(DEdistNN) export(DEmarker) export(DEneighb) +export(addCellMarkersToCGS) +export(dotplotDEgenes) +export(findKeyType) export(getEmb) export(getExpr) export(getMD) +export(labelCellTypes) +export(map2symbol) export(meanLogX) +export(plot_GEboxplot) +export(plot_clustSep) +export(plot_clusterGenes_DEgenes) +export(plot_clusterGenes_markers) +export(plot_clusterGenes_search) +export(plot_compareClusts) +export(plot_deDotplot) +export(plot_mdCompare) +export(plot_mdPerClust) +export(plot_sil) +export(plot_tsne) export(rainbow2) export(runShiny) export(spreadLabels2) +export(tsne_labels) exportClasses(sCVdata) exportClasses(sCVparams) exportMethods("ClustGeneStats<-") diff --git a/R/deTest.R b/R/deTest.R index 7a344e9..eb999b2 100644 --- a/R/deTest.R +++ b/R/deTest.R @@ -96,6 +96,8 @@ NULL #' method of your choice. This can be passed into your \code{sCVdata} objects #' in the list returned by \code{CalcAllSCV} using the function #' \code{\link{calcDEcombn}}. See function documentation for details. +#' @param UseBiocParallel Default = FALSE. Very experimental implementation of +#' BiocParallel for calculations. Not recommended. #' #' @return The function returns a list containing \code{\link{sCVdata}} objects #' for each cluster resolution (sample) in the \code{clusterDF} data frame. @@ -111,7 +113,7 @@ NULL #' names(getMD(your_scRNAseq_data_object))) #' # ^ Finds the cluster columns of the metadata in a Seurat object. #' -#' your_cluster_results <- getMD(your_scRNAseq_data_object)[,your_cluster_columns] +#' your_cluster_results <- getMD(your_scRNAseq_data_object)[your_cluster_columns] #' #' sCVdata_list <- CalcAllSCV(inD=your_scRNAseq_data_object, #' clusterDF=your_cluster_results, @@ -152,7 +154,8 @@ CalcAllSCV <- function(inD, storeAllDE=T, calcSil=T, calcDEvsRest=T, - calcDEcombn=T) { + calcDEcombn=T, + UseBiocParallel=F) { if (!is(inD)[1] %in% findMethodSignatures(getExpr)) { stop(paste( paste0("Input data object must be one of: ", @@ -183,13 +186,13 @@ CalcAllSCV <- function(inD, # If testAll == F, cluster solutions are sorted in ascending order of number # of clusters found. if (!testAll) { - warning(paste(" Testing cluster solutions in ascending order of number of clusters found.", + message(paste(" Testing cluster solutions in ascending order of number of clusters found.", "Testing will stop after finding a solution with 0 differentially expressed", "genes between nearest neighbouring clusters, and the resulting list of", "sCVdata objects will be in ascending order of number of clusters found.", sep="\n ")) sortedClusts <- order(sapply(clusterDF,function(X) length(unique(X)))) - clusterDF <- clusterDF[,sortedClusts] + clusterDF <- clusterDF[sortedClusts] } # This loop iterates through every cluster solution, and does DE testing @@ -216,9 +219,10 @@ CalcAllSCV <- function(inD, storeAllDE=storeAllDE, calcSil=calcSil, calcDEvsRest=calcDEvsRest, - calcDEcombn=calcDEcombn) + calcDEcombn=calcDEcombn, + UseBiocParallel=UseBiocParallel) if (!testAll) { - if (min(sapply(DEneighb(outList[[X]],FDRthresh),length)) < 1) { break } + if (min(sapply(DEneighb(outList[[X]],FDRthresh),nrow)) < 1) { break } } } @@ -306,6 +310,8 @@ CalcAllSCV <- function(inD, #' method of your choice. This can be passed into your \code{sCVdata} objects #' in the list returned by \code{CalcAllSCV} using the function #' \code{\link{calcDEcombn}}. See function documentation for details. +#' @param UseBiocParallel Default = FALSE. Very experimental implementation of +#' BiocParallel for calculations. Not recommended. #' #' @return The function returns an \code{\link{sCVdata}} object with all slots #' populated by default, and at least the \code{Clusters}, @@ -372,7 +378,8 @@ CalcSCV <- function(inD, storeAllDE=T, calcSil=T, calcDEvsRest=T, - calcDEcombn=T) { + calcDEcombn=T, + UseBiocParallel=F) { if (!is(inD)[1] %in% findMethodSignatures(getExpr)) { stop(paste( paste0("Input data object must be one of: ", @@ -418,14 +425,15 @@ CalcSCV <- function(inD, } } - ClustGeneStats(out) <- CalcCGS(out,inD) #this is not optional, since everything depends on it. + #this is not optional, since everything depends on it. + ClustGeneStats(out) <- CalcCGS(out,inD,UseBiocParallel) if (calcDEvsRest) { - DEvsRest(out) <- CalcDEvsRest(out,inD,storeAllDE) + DEvsRest(out) <- CalcDEvsRest(out,inD,storeAllDE,UseBiocParallel) } if (calcDEcombn) { - DEcombn(out) <- CalcDEcombn(out,inD,storeAllDE) + DEcombn(out) <- CalcDEcombn(out,inD,storeAllDE,UseBiocParallel) } return(out) } @@ -457,23 +465,10 @@ CalcSCV <- function(inD, fx_calcCGS <- function(nge,cl,exponent,pseudocount) { message("-- Calculating gene detection rate per cluster --") - # DR <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), - # function(X) apply(X,1,function(Y) sum(Y>0)/length(Y))) - # names(DR) <- levels(cl) DR <- pbapply::pbsapply(sapply(levels(cl),function(i) nge[,cl %in% i,drop=F],simplify=F), function(X) apply(X,1,function(Y) sum(Y > 0)/length(Y)),simplify=F) message("-- Calculating mean detected gene expression per cluster --") - # MDGE <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), - # function(X) apply(X,1,function(Y) { - # temp <- meanLogX(Y[Y>0], - # ncell=ncol(nge), - # ex=exponent, - # pc=pseudocount) - # if (is.na(temp)) { temp <- 0 } - # return(temp) - # })) - # names(MDGE) <- levels(cl) MDGE <- pbapply::pbsapply(sapply(levels(cl),function(i) nge[,cl %in% i,drop=F],simplify=F), function(X) apply(X,1,function(Y) { temp <- meanLogX(Y[Y > 0], @@ -485,13 +480,6 @@ fx_calcCGS <- function(nge,cl,exponent,pseudocount) { }),simplify=F) message("-- Calculating mean gene expression per cluster --") - # MGE <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), - # function(X) apply(X,1,function(Y) - # meanLogX(Y, - # ncell=ncol(nge), - # ex=exponent, - # pc=pseudocount))) - # names(MGE) <- levels(cl) MGE <- pbapply::pbsapply(sapply(levels(cl),function(i) nge[,cl %in% i,drop=F],simplify=F), function(X) apply(X,1,function(Y) meanLogX(Y, @@ -503,6 +491,59 @@ fx_calcCGS <- function(nge,cl,exponent,pseudocount) { data.frame(DR=DR[[X]],MDGE=MDGE[[X]],MGE=MGE[[X]]),simplify=F)) } +#' Internal fx for cluster-wise gene statistics using BiocParallel +#' +#' Internal function. See \code{\link{CalcCGS}}. +#' +#' @param nge The log-normalized gene expression matrix. +#' @param cl The factor with cluster assignments per cell (column of nge). +#' @param exponent The log base of your normalized input data. Seurat +#' normalization uses the natural log (set this to exp(1)), while other +#' normalization methods generally use log2 (set this to 2). +#' @param pseudocount The pseudocount added to all log-normalized values in your +#' input data. Most methods use a pseudocount of 1 to eliminate log(0) errors. +#' +#' @return The function returns a list of dataframes. Each list element contains +#' a named list of clusters at that resolution. Each of those list elements +#' contains a dataframe of three variables, where each sample is a gene. +#' \code{DR} is the proportion of cells in the cluster in which that gene was +#' detected. \code{MDGE} is mean normalized gene expression for that gene in +#' only the cells in which it was detected (see \code{\link{meanLogX}} for +#' mean calculation). \code{MGE} is the mean normalized gene expression for +#' that gene in all cells of the cluster (see \code{\link{meanLogX}} for mean +#' calculation). + +fx_calcCGS_BP <- function(nge,cl,exponent,pseudocount) { + message("-- Calculating gene detection rate per cluster --") + DR <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), + function(X) apply(X,1,function(Y) sum(Y>0)/length(Y))) + names(DR) <- levels(cl) + + message("-- Calculating mean detected gene expression per cluster --") + MDGE <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), + function(X) apply(X,1,function(Y) { + temp <- meanLogX(Y[Y>0], + ncell=ncol(nge), + ex=exponent, + pc=pseudocount) + if (is.na(temp)) { temp <- 0 } + return(temp) + })) + names(MDGE) <- levels(cl) + + message("-- Calculating mean gene expression per cluster --") + MGE <- BiocParallel::bplapply(sapply(levels(cl),function(i) nge[,cl %in% i],simplify=F), + function(X) apply(X,1,function(Y) + meanLogX(Y, + ncell=ncol(nge), + ex=exponent, + pc=pseudocount))) + names(MGE) <- levels(cl) + + return(sapply(levels(cl),function(X) + data.frame(DR=DR[[X]],MDGE=MDGE[[X]],MGE=MGE[[X]]),simplify=F)) +} + #' Calculate cluster-wise gene statistics for sCVdata #' @@ -521,6 +562,8 @@ fx_calcCGS <- function(nge,cl,exponent,pseudocount) { #' classes are not currently supported. #' \href{https://github.com/BaderLab/scClustViz/issues}{Please submit requests #' for other data objects here!} +#' @param UseBiocParallel Default = FALSE. Very experimental implementation of +#' BiocParallel for calculations. Not recommended. #' #' @return The function returns a list of dataframes. Each list element contains #' a named list of clusters at that resolution. Each of those list elements @@ -547,18 +590,25 @@ fx_calcCGS <- function(nge,cl,exponent,pseudocount) { #' @export #' -setGeneric("CalcCGS",function(sCVd,inD) standardGeneric("CalcCGS")) +setGeneric("CalcCGS",function(sCVd,inD,UseBiocParallel) standardGeneric("CalcCGS")) #' @describeIn CalcCGS Calculate cluster-wise gene stats for sCVdata #' @export setMethod("CalcCGS",signature("sCVdata"), - function(sCVd,inD) { - fx_calcCGS(nge=getExpr(inD,Param(sCVd,"assayType")), - cl=Clusters(sCVd), - exponent=Param(sCVd,"exponent"), - pseudocount=Param(sCVd,"pseudocount")) + function(sCVd,inD,UseBiocParallel) { + if (UseBiocParallel) { + fx_calcCGS_BP(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + exponent=Param(sCVd,"exponent"), + pseudocount=Param(sCVd,"pseudocount")) + } else { + fx_calcCGS(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + exponent=Param(sCVd,"exponent"), + pseudocount=Param(sCVd,"pseudocount")) + } }) @@ -594,22 +644,6 @@ setMethod("CalcCGS",signature("sCVdata"), fx_calcESvsRest <- function(nge,cl,CGS,exponent,pseudocount,DRthresh) { message("-- Calculating differential expression cluster vs rest effect size --") - # temp <- BiocParallel::bplapply(levels(cl),function(i) { - # temp <- data.frame(overThreshold = CGS[[i]]$DR >= DRthresh, - # logGER=NA, - # pVal=NA, - # FDR=NA) - # rownames(temp) <- rownames(CGS[[i]]) - # temp[temp$overThreshold,"logGER"] <- CGS[[i]][temp$overThreshold,"MGE"] - - # apply(nge[temp$overThreshold,(!cl %in% i | is.na(cl))],1,function(Y) - # meanLogX(Y,ncell=ncol(nge),ex=exponent,pc=pseudocount)) - # # temp$overThreshold <- temp$logGER > 0 - # # temp[is.na(temp$overThreshold),"overThreshold"] <- F - # return(temp) - # }) - # names(temp) <- levels(cl) - # return(temp) - return(pbapply::pbsapply(levels(cl),function(i) { temp <- data.frame(overThreshold = CGS[[i]]$DR >= DRthresh, logGER=NA, @@ -620,13 +654,58 @@ fx_calcESvsRest <- function(nge,cl,CGS,exponent,pseudocount,DRthresh) { temp[temp$overThreshold,"logGER"] <- CGS[[i]][temp$overThreshold,"MGE"] - apply(nge[temp$overThreshold,(!cl %in% i | is.na(cl))],1,function(Y) meanLogX(Y,ncell=ncol(nge),ex=exponent,pc=pseudocount)) - # temp$overThreshold <- temp$logGER > 0 - # temp[is.na(temp$overThreshold),"overThreshold"] <- F return(temp) },simplify=F)) } +#' Internal fx to calculate logGER for DEvsRest calculation using BiocParallel +#' +#' Internal function. See \code{\link{CalcDEvsRest}}. +#' +#' Calculates the log-ratios of gene expression for all genes in each one-vs-all +#' comparison of a cluster vs the rest of the data. This is used to determine +#' the genes used in DEvsRest calculations. +#' +#' @param nge The log-normalized gene expression matrix. +#' @param cl The factor with cluster assignments per cell (column of nge). +#' @param CGS The output from \code{\link{CalcCGS}}. +#' @param exponent The log base of your normalized input data. Seurat +#' normalization uses the natural log (set this to exp(1)), while other +#' normalization methods generally use log2 (set this to 2). +#' @param pseudocount The pseudocount added to all log-normalized values in your +#' input data. Most methods use a pseudocount of 1 to eliminate log(0) errors. +#' @param DRthresh The threshold for minimum detection rate of a gene in the +#' cluster for the gene to be considered in the following Wilcoxon rank-sum +#' test. +#' +#' @return The function returns a list where each list element is the log-ratios +#' of gene expression when comparing each gene in a cluster to the rest of the +#' cells as a whole in a one vs all comparison. These logGER tables are +#' filtered to only include those gene that pass logGER threshold, and thus +#' the names for each list entry correspond to the genes to test in +#' \code{\link{fx_calcDEvsRest}}. +#' + +fx_calcESvsRest_BP <- function(nge,cl,CGS,exponent,pseudocount,DRthresh) { + message("-- Calculating differential expression cluster vs rest effect size --") + temp <- BiocParallel::bplapply(levels(cl),function(i) { + temp <- data.frame(overThreshold = CGS[[i]]$DR >= DRthresh, + logGER=NA, + Wstat=NA, + pVal=NA, + FDR=NA) + rownames(temp) <- rownames(CGS[[i]]) + temp[temp$overThreshold,"logGER"] <- CGS[[i]][temp$overThreshold,"MGE"] - + apply(nge[temp$overThreshold,(!cl %in% i | is.na(cl))],1,function(Y) + meanLogX(Y,ncell=ncol(nge),ex=exponent,pc=pseudocount)) + return(temp) + }) + names(temp) <- levels(cl) + return(temp) +} + + #' Internal fx to perform one vs all DE testing #' #' Internal function. See \code{\link{CalcDEvsRest}}. @@ -653,13 +732,6 @@ fx_calcESvsRest <- function(nge,cl,CGS,exponent,pseudocount,DRthresh) { fx_calcDEvsRest <- function(nge,cl,deTes) { message("-- Testing differential expression cluster vs rest --") - # deT_pVal <- BiocParallel::bplapply(levels(cl),function(i) - # apply(nge[rownames(deTes[[i]])[deTes[[i]]$overThreshold],],1, #slice by rowname is slower, but safer - # function(X) - # # suppressWarnings(wilcox.test(X[cl %in% i],X[!cl %in% i],alternative="greater")$p.value) - # suppressWarnings(wilcox.test(X[cl %in% i],X[!cl %in% i])$p.value) - # )) - # names(deT_pVal) <- levels(cl) deT_pVal <- pbapply::pbsapply(levels(cl),function(i) apply(nge[rownames(deTes[[i]])[deTes[[i]]$overThreshold],],1,function(X) # ^ slice by rowname is a little slower, but safer @@ -675,6 +747,47 @@ fx_calcDEvsRest <- function(nge,cl,deTes) { } +#' Internal fx to perform one vs all DE testing using BiocParallel +#' +#' Internal function. See \code{\link{CalcDEvsRest}}. +#' +#' Calculates Wilcoxon rank-sum tests for all genes in each one-vs-all +#' comparison of a cluster vs the rest of the data. You probably don't need to +#' use this unless you're trying to customize \code{\link{clusterWiseDEtest}}. +#' +#' @param nge The log-normalized gene expression matrix. +#' @param cl The factor with cluster assignments per cell (column of nge). +#' @param deTes The output from \code{\link{fx_calcESvsRest}}. +#' +#' @return Differential testing results from Wilcoxon rank sum tests comparing a +#' gene in each cluster to the rest of the cells as a whole in a one vs all +#' comparison. The results are stored as a named list of dataframes. There is +#' a list element for each cluster containing a dataframe of three variables, +#' where each sample is a gene. \code{logGER} is the log gene expression ratio +#' calculated by subtracting the mean expression of the gene (see +#' \link{meanLogX} for mean calculation) in all other cells from the mean +#' expression of the gene in this cluster. \code{Wstat} and \code{pVal} are +#' the test statistic and the p-value of the Wilcoxon rank sum test. +#' \code{FDR} is the false discovery rate-corrected p-value of the test. +#' + +fx_calcDEvsRest_BP <- function(nge,cl,deTes) { + message("-- Testing differential expression cluster vs rest --") + deT_pVal <- BiocParallel::bplapply(levels(cl),function(i) + apply(nge[rownames(deTes[[i]])[deTes[[i]]$overThreshold],],1,function(X) + # suppressWarnings(wilcox.test(X[cl %in% i],X[!cl %in% i],alternative="greater")$p.value) + suppressWarnings(unlist(wilcox.test(X[cl %in% i],X[!cl %in% i])[c("statistic","p.value")])) + )) + names(deT_pVal) <- levels(cl) + for (i in names(deTes)) { + deTes[[i]][colnames(deT_pVal[[i]]),"Wstat"] <- deT_pVal[[i]]["statistic.W",] + deTes[[i]][colnames(deT_pVal[[i]]),"pVal"] <- deT_pVal[[i]]["p.value",] + deTes[[i]][colnames(deT_pVal[[i]]),"FDR"] <- p.adjust(deT_pVal[[i]]["p.value",],"fdr") + } + return(deTes) +} + + #' Calculates one vs. all DE tests for sCVdata #' #' Performs differential gene expression tests for each cluster in an sCVdata @@ -708,6 +821,8 @@ fx_calcDEvsRest <- function(nge,cl,deTes) { #' (TRUE), or just those passing the detection rate threshold for the Wilcoxon #' rank-sum test (FALSE). Setting this to FALSE will reduce the size of the #' output sCVdata object. +#' @param UseBiocParallel Default = FALSE. Very experimental implementation of +#' BiocParallel for calculations. Not recommended. #' #' @return A named list of data frames, one entry for each level in #' \code{Clusters(sCVd)} (with corresponding name).Each entry is data frame @@ -756,31 +871,47 @@ fx_calcDEvsRest <- function(nge,cl,deTes) { #' @export #' -setGeneric("CalcDEvsRest",function(sCVd,inD,storeAllDE) standardGeneric("CalcDEvsRest")) +setGeneric("CalcDEvsRest",function(sCVd,inD,storeAllDE,UseBiocParallel) + standardGeneric("CalcDEvsRest")) #' @describeIn CalcDEvsRest Calculate one vs. all DE tests for sCVdata #' @export setMethod("CalcDEvsRest","sCVdata", - function(sCVd,inD,storeAllDE) { + function(sCVd,inD,storeAllDE,UseBiocParallel) { if (!is(inD)[1] %in% findMethodSignatures(getExpr)) { stop(paste("The input data object must be one of:", paste(findMethodSignatures(getExpr),collapse=", "), sep="\n ")) } - deTes <- fx_calcESvsRest(nge=getExpr(inD,Param(sCVd,"assayType")), - cl=Clusters(sCVd), - CGS=ClustGeneStats(sCVd), - exponent=Param(sCVd,"exponent"), - pseudocount=Param(sCVd,"pseudocount"), - DRthresh=Param(sCVd,"DRthresh")) + if (UseBiocParallel) { + deTes <- fx_calcESvsRest_BP(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + CGS=ClustGeneStats(sCVd), + exponent=Param(sCVd,"exponent"), + pseudocount=Param(sCVd,"pseudocount"), + DRthresh=Param(sCVd,"DRthresh")) + } else { + deTes <- fx_calcESvsRest(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + CGS=ClustGeneStats(sCVd), + exponent=Param(sCVd,"exponent"), + pseudocount=Param(sCVd,"pseudocount"), + DRthresh=Param(sCVd,"DRthresh")) + } if (!storeAllDE) { deTes <- sapply(deTes,function(X) X[X$overThreshold,],simplify=F) } - deTes <- fx_calcDEvsRest(nge=getExpr(inD,Param(sCVd,"assayType")), - cl=Clusters(sCVd), - deTes=deTes) + if (UseBiocParallel) { + deTes <- fx_calcDEvsRest_BP(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + deTes=deTes) + } else { + deTes <- fx_calcDEvsRest(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + deTes=deTes) + } return(deTes) }) @@ -852,10 +983,6 @@ fx_calcEScombn <- function(cl,CGS,DRthresh) { fx_calcDEcombn <- function(nge,cl,deMes) { combosL <- strsplit(names(deMes),"-") message("-- Testing differential expression between clusters --") - # deM_pVal <- BiocParallel::bplapply(seq_along(combosL),function(i) - # apply(nge[rownames(deMes[[i]])[deMes[[i]]$overThreshold],],1,function(X) - # suppressWarnings(wilcox.test(X[cl == combosL[[i]][1]], - # X[cl == combosL[[i]][2]])$p.value))) deM_pVal <- pbapply::pbsapply(seq_along(combosL),function(i) apply(nge[rownames(deMes[[i]])[deMes[[i]]$overThreshold],],1,function(X) suppressWarnings(unlist( @@ -871,6 +998,52 @@ fx_calcDEcombn <- function(nge,cl,deMes) { } +#' Internal fx to calculate DE between combinations of clusters using BiocParallel +#' +#' Internal function. See \code{\link{CalcDEcombn}}. +#' +#' Calculates Wilcoxon rank-sum tests for all genes in each of the potential +#' combinations of clusters to compare. +#' +#' @param nge The log-normalized gene expression matrix. +#' @param cl The factor with cluster assignments per cell (column of nge). +#' @param deMes The output from \code{\link{fx_calcEScombn}}. +#' +#' @return Differential testing results from Wilcoxon rank sum tests comparing a +#' gene in each cluster to that gene in every other cluster in a series of +#' tests. The results are stored as a nested list of dataframes. Each list +#' element contains a named list of clusters (cluster A). Each of those lists +#' contains a named list of all the other clusters (cluster B). Each of those +#' list elements contains a dataframe of four variables, where each sample is +#' a gene. \code{dDR} is the difference in detection rate of that gene between +#' the two clusters (DR[A] - DR[B]). \code{logGER} is the log gene expression +#' ratio calculated by taking the difference in mean expression of the gene +#' (see \code{\link{meanLogX}} for mean calculation) between the two clusters +#' (MGE[A] - MGE[B]). \code{Wstat} and \code{pVal} are the test statistic and +#' the p-value of the Wilcoxon rank sum test. \code{FDR} is the false +#' discovery rate-corrected p-value of the test. +#' + +fx_calcDEcombn_BP <- function(nge,cl,deMes) { + combosL <- strsplit(names(deMes),"-") + message("-- Testing differential expression between clusters --") + deM_pVal <- BiocParallel::bplapply(seq_along(combosL),function(i) + apply(nge[rownames(deMes[[i]])[deMes[[i]]$overThreshold],],1,function(X) + suppressWarnings(unlist( + wilcox.test(X[cl == combosL[[i]][1]], + X[cl == combosL[[i]][2]])[c("statistic","p.value")] + )) + ) + ) + for (i in seq_along(deMes)) { + deMes[[i]][colnames(deM_pVal[[i]]),"Wstat"] <- deM_pVal[[i]]["statistic.W",] + deMes[[i]][colnames(deM_pVal[[i]]),"pVal"] <- deM_pVal[[i]]["p.value",] + deMes[[i]][colnames(deM_pVal[[i]]),"FDR"] <- p.adjust(deM_pVal[[i]]["p.value",],"fdr") + } + return(deMes) +} + + #' Performs DE testing between pairs of clusters in sCVdata #' #' Performs differential gene expression tests between each pairwise combination @@ -906,6 +1079,8 @@ fx_calcDEcombn <- function(nge,cl,deMes) { #' (TRUE), or just those passing the detection rate threshold for the Wilcoxon #' rank-sum test (FALSE). Setting this to FALSE will reduce the size of the #' output sCVdata object. +#' @param UseBiocParallel Default = FALSE. Very experimental implementation of +#' BiocParallel for calculations. Not recommended. #' #' @return A named list of data frames, one entry for each pairwise combination #' of levels in \code{Clusters(sCVd)} (with corresponding name where levels @@ -972,14 +1147,15 @@ fx_calcDEcombn <- function(nge,cl,deMes) { #' @export #' -setGeneric("CalcDEcombn",function(sCVd,inD,storeAllDE) standardGeneric("CalcDEcombn")) +setGeneric("CalcDEcombn",function(sCVd,inD,storeAllDE,UseBiocParallel) + standardGeneric("CalcDEcombn")) #' @describeIn CalcDEcombn Calculate DE between cluster pairs #' @export setMethod("CalcDEcombn","sCVdata", - function(sCVd,inD,storeAllDE) { + function(sCVd,inD,storeAllDE,UseBiocParallel) { if (!is(inD)[1] %in% findMethodSignatures(getExpr)) { stop(paste("The input data object must be one of:", paste(findMethodSignatures(getExpr),collapse=", "), @@ -991,9 +1167,15 @@ setMethod("CalcDEcombn","sCVdata", if (!storeAllDE) { deMes <- sapply(deMes,function(X) X[X$overThreshold,],simplify=F) } - deMes <- fx_calcDEcombn(nge=getExpr(inD,Param(sCVd,"assayType")), - cl=Clusters(sCVd), - deMes=deMes) + if (UseBiocParallel) { + deMes <- fx_calcDEcombn_BP(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + deMes=deMes) + } else { + deMes <- fx_calcDEcombn(nge=getExpr(inD,Param(sCVd,"assayType")), + cl=Clusters(sCVd), + deMes=deMes) + } return(deMes) }) diff --git a/R/helperFx.R b/R/helperFx.R index 98af46c..84d2d93 100644 --- a/R/helperFx.R +++ b/R/helperFx.R @@ -85,6 +85,7 @@ cosineSim <- function(A,B) sum(A*B)/sqrt(sum(A^2)*sum(B^2)) #' #' @seealso \code{\link{map2symbol}} #' +#' @export findKeyType <- function(nge,annotationDB) { rownameKeytype <- "SYMBOL" @@ -122,6 +123,7 @@ findKeyType <- function(nge,annotationDB) { #' @seealso \code{\link{findKeyType}},\code{\link[AnnotationDbi]{mapIds}}, #' \code{\link{addCellMarkersToCGS}}, and \code{\link{labelCellTypes}}. #' +#' @export map2symbol <- function(nge,annotationDB,rownameKeytype) { if (rownameKeytype != "SYMBOL") { @@ -161,7 +163,8 @@ map2symbol <- function(nge,annotationDB,rownameKeytype) { #' #' @seealso \code{\link{findKeyType}}, \code{\link{map2symbol}}, and #' \code{\link[AnnotationDbi]{mapIds}}. -#' +#' +#' @export addCellMarkersToCGS <- function(sCV,cellMarkersU,cellMarkersS,symbolMap) { if (is.null(ClustGeneStats(sCV))) { @@ -186,7 +189,7 @@ addCellMarkersToCGS <- function(sCV,cellMarkersU,cellMarkersS,symbolMap) { } -#' Add predicted cell type names to cluster labels +#' scClustViz helper fx: Add predicted cell type names to cluster labels #' #' A bare-bones method of predicting cell types from marker genes. #' @@ -203,7 +206,8 @@ addCellMarkersToCGS <- function(sCV,cellMarkersU,cellMarkersS,symbolMap) { #' @return Returns the sCVdata object with an added attribute #' '\code{ClusterNames}' to \code{Clusters(sCV)} containing the assigned cell #' type names for each cluster. -#' +#' +#' @export labelCellTypes <- function(sCV,cellMarkers,symbolMap) { if (length(cellMarkers) < 1) { diff --git a/R/runViz.R b/R/runViz.R index 7a6990b..72dbc23 100644 --- a/R/runViz.R +++ b/R/runViz.R @@ -31,6 +31,11 @@ #' function will assume the rownames are official gene symbols. If less than #' 80% of rownames map to official gene symbols, the function will try to #' predict the appropriate keytype of the rownames (this takes a bit of time). +#' @param imageFileType Default="pdf". The file format for saved figures. One of +#' \code{"pdf"} (generated with \code{\link[grDevices]{cairo_pdf}}), +#' \code{"eps"} (generated with \code{\link[grDevices]{cairo_ps}}), +#' \code{"tiff"} (generated with \code{\link[grDevices]{tiff}}), or +#' \code{"png"} (generated with \code{\link[grDevices]{png}}). #' @param ... Named options that should be passed to the #' \code{\link[shiny]{runApp}} call (these can be any of the following: #' "port", "launch.browser", "host", "quiet", "display.mode" and "test.mode"). @@ -95,7 +100,9 @@ #' @export #' -runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,...) { +runShiny <- function(filePath,outPath, + cellMarkers,annotationDB,rownameKeytype, + imageFileType="pdf",...) { # ^ Load data from file ------------------------------------------------------------------ while(T) { @@ -221,6 +228,11 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. if (!missing(outPath)) { dataPath <- outPath } + if (!imageFileType %in% c("pdf","eps","png","tiff")) { + warning('imageFileType must be one of c("pdf","eps","png","tiff"). Setting to "pdf".') + imageFileType <- "pdf" + } + # ^ Helper calcs for Shiny ----------------------------------------------------- # ^^ Map rownames to gene symbol ------------------- symbolMap <- NULL @@ -304,7 +316,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. "where all nearest neighbouring clusters have differentially expressed genes", "between them), click View clusters at this resolution to proceed. If you", "want to save this cluster solution as the default for next time, click Save", - "this resolution as default. All figures can be downloaded as PDFs by clicking", + "this resolution as default. All figures can be downloaded by clicking", "the buttons next to each figure." ))), h1() @@ -329,8 +341,8 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. column(6,plotOutput("sil",height="670px")) ), fixedRow( - column(6,downloadButton("clustSepSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") + column(6,downloadButton("clustSepSave",paste("Save as",toupper(imageFileType))),align="left"), + column(6,downloadButton("silSave",paste("Save as",toupper(imageFileType))),align="right") ), hr(), @@ -372,8 +384,8 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. column(6,plotOutput("tsneMD",height="580px")) ), fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("tsneMDSave","Save as PDF")) + column(6,align="left",downloadButton("tsneSave",paste("Save as",toupper(imageFileType)))), + column(6,align="right",downloadButton("tsneMDSave",paste("Save as",toupper(imageFileType)))) ), hr(), @@ -390,8 +402,8 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. column(6,plotOutput("mdFactor",height="560px")) ), fixedRow( - column(6,align="left",downloadButton("mdScatterSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) + column(6,align="left",downloadButton("mdScatterSave",paste("Save as",toupper(imageFileType)))), + column(6,align="right",downloadButton("mdFactorSave",paste("Save as",toupper(imageFileType)))) ), hr(), @@ -431,13 +443,13 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. fixedRow( column(2,uiOutput("heatDEtype"), - numericInput("FDRthresh2",label="FDR",value=.01,max=1)), + numericInput("FDRthresh2",label="FDR",value=.05,max=1)), column(6,uiOutput("DEgeneSlider")), column(2,uiOutput("DEclustSelect")), column(2, downloadButton("CGSsave0","Download cluster gene stats"), downloadButton("deGeneSave","Download DE results"), - downloadButton("heatmapSave","Save as PDF")) + downloadButton("heatmapSave",paste("Save as",toupper(imageFileType)))) ), fixedRow(plotOutput("dotplot",height="600px")), hr(), @@ -480,7 +492,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. ), fixedRow(align="right", plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") + downloadButton("clusterGenesSave",paste("Save as",toupper(imageFileType))) ), fixedRow( column(3,radioButtons("searchType",label="Search by:", @@ -503,7 +515,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. "Include detection rate"="dr"))) ), fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") + downloadButton("geneTestSave",paste("Save as",toupper(imageFileType))) ), hr(), @@ -554,8 +566,8 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. column(6,plotOutput("goiPlot2",height="580px")) ), fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) + column(6,align="left",downloadButton("goiPlot1Save",paste("Save as",toupper(imageFileType)))), + column(6,align="right",downloadButton("goiPlot2Save",paste("Save as",toupper(imageFileType)))) ), hr(), @@ -634,7 +646,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. ), fixedRow(column(12,uiOutput("diffLabelSelect"))), fixedRow( - column(4,downloadButton("setScatterSave","Save as PDF")), + column(4,downloadButton("setScatterSave",paste("Save as",toupper(imageFileType)))), column(4,downloadButton("setComparisonSave","Download DE results")) ) ) @@ -811,16 +823,20 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$clustSepSave <- downloadHandler( - filename="clustSep.pdf", + filename=paste0("clustSep.",imageFileType), content=function(file) { - pdf(file,width=7,height=6) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=6,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=6,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=6,width=7,units="in",res=600), + "png"=grDevices::png(file,height=6,width=7,units="in",res=600)) print(plot_clustSep(sCVdL=d$SCV[!grepl("^Comp:",names(d$SCV))], DEtype=input$deType, FDRthresh=input$FDRthresh1, res=input$res, Xlim=clustSepRanges$x, Ylim=clustSepRanges$y)) - dev.off() + grDevices::dev.off() } ) @@ -828,7 +844,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. output$FDRthresh1 <- renderUI({ if (input$deType %in% c("DEneighb","DEmarker")) { numericInput("FDRthresh1",label="FDR", - value=.01,max=1) + value=.05,max=1) } }) @@ -853,12 +869,16 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$silSave <- downloadHandler( - filename="sil.pdf", + filename=paste0("sil.",imageFileType), content=function(file) { if (!is.null(Silhouette(d$SCV[[input$res]]))) { - pdf(file,width=6,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=6,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=6,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=6,units="in",res=600), + "png"=grDevices::png(file,height=7,width=6,units="in",res=600)) plot_sil(d$SCV[[input$res]]) - dev.off() + grDevices::dev.off() } } ) @@ -905,10 +925,14 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$tsneSave <- downloadHandler( - filename="tsne.pdf", + filename=paste0("tsne.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_tsne(cell_coord=getEmb(inD,"tsne"), md=Clusters(d$SCV[[res()]]), md_title=NULL, @@ -926,7 +950,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. x0=temp_labels[X,1],y0=temp_labels[X,2], x1=temp_labels[temp_nn[[X]],1],y1=temp_labels[temp_nn[[X]],2])) } - dev.off() + grDevices::dev.off() } } ) @@ -1003,10 +1027,14 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", + filename=paste0("tsneMD.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) if (length(input$tsneMDlog) > 0) { if (input$tsneMDlog == "log") { temp_log <- T } } else { temp_log <- F } @@ -1018,7 +1046,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. cell_coord=getEmb(inD,"tsne"), lab_type=input$tsneLabels), sel_cells=selCells()) - dev.off() + grDevices::dev.off() } } ) @@ -1079,17 +1107,21 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", + filename=paste0("mdScatter.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_mdCompare(MD=d$MD, mdX=input$mdScatterX, mdY=input$mdScatterY, sel_cells=selCells(), sel_clust=selClustName(), md_log=input$scatterLog) - dev.off() + grDevices::dev.off() } } ) @@ -1131,7 +1163,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", + filename=paste0("mdFactor.",imageFileType), content=function(file) { if (length(res()) > 0) { if (is.character(d$MD[[input$mdFactorData]]) | @@ -1144,12 +1176,16 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. temp_opts <- input$mdFactorOptsN } } - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_mdPerClust(MD=d$MD, sel=input$mdFactorData, cl=Clusters(d$SCV[[res()]]), opt=temp_opts) - dev.off() + grDevices::dev.off() } } ) @@ -1223,14 +1259,18 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", + filename=paste0("heatmap.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=11,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=11,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=11,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=11,units="in",res=600), + "png"=grDevices::png(file,height=7,width=11,units="in",res=600)) plot_deDotplot(sCVd=d$SCV[[res()]], DEgenes=DEgenes(), DEnum=input$DEgeneCount) - dev.off() + grDevices::dev.off() } } ) @@ -1341,10 +1381,14 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) #,res=96) # enlarge plot features in interactive session output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", + filename=paste0("clusterGenes.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=12,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=12,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=12,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=12,units="in",res=600), + "png"=grDevices::png(file,height=7,width=12,units="in",res=600)) switch(input$cgLegend, markers=plot_clusterGenes_markers(sCVd=d$SCV[[res()]], selClust=selClust(), @@ -1362,7 +1406,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) text(.5,.5,"Whooops. input$cgLegend is making up words.") }) - dev.off() + grDevices::dev.off() } } ) @@ -1401,16 +1445,20 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", + filename=paste0("geneBoxplot.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=12,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=12,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=12,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=12,units="in",res=600), + "png"=grDevices::png(file,height=7,width=12,units="in",res=600)) plot_GEboxplot(nge=getExpr(inD,Param(sCVdL[[1]],"assayType")), sCVd=d$SCV[[res()]], gene=input$cgGene, geneName=geneNameBx(), opts=input$bxpOpts) - dev.off() + grDevices::dev.off() } } ) @@ -1549,10 +1597,14 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. }) output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", + filename=paste0("goi1.",imageFileType), content=function(file) { if (input$plotClust2 == "goi" & !is.null(input$goi1)) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_tsne(cell_coord=getEmb(inD,"tsne"), md=getExpr(inD,Param(sCVdL[[1]],"assayType"))[input$goi1,], md_title=geneNameGOI1(), @@ -1563,15 +1615,19 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. lab_type=input$tsneLabels), "FALSE"=NULL) ) - dev.off() + grDevices::dev.off() } } ) output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", + filename=paste0("goi2.",imageFileType), content=function(file) { if (input$plotClust2 == "goi" & !is.null(input$goi2)) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_tsne(cell_coord=getEmb(inD,"tsne"), md=getExpr(inD,Param(sCVdL[[1]],"assayType"))[input$goi2,], md_title=geneNameGOI2(), @@ -1582,7 +1638,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. lab_type=input$tsneLabels), "FALSE"=NULL) ) - dev.off() + grDevices::dev.off() } } ) @@ -1670,10 +1726,14 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. ) output$setScatterSave <- downloadHandler( - filename="setScatter.pdf", + filename=paste0("setScatter.",imageFileType), content=function(file) { if (length(res()) > 0) { - pdf(file,width=7,height=7) + switch(imageFileType, + "pdf"=grDevices::cairo_pdf(file,height=7,width=7,fallback_resolution=600), + "eps"=grDevices::cairo_ps(file,height=7,width=7,fallback_resolution=600), + "tiff"=grDevices::tiff(file,height=7,width=7,units="in",res=600), + "png"=grDevices::png(file,height=7,width=7,units="in",res=600)) plot_compareClusts(sCVd=d$SCV[[res()]], clA=input$ssA, clB=input$ssB, @@ -1682,7 +1742,7 @@ runShiny <- function(filePath,outPath,cellMarkers,annotationDB,rownameKeytype,.. labTypeDiff=input$diffLabelChoice, labNum=input$diffCount, labGenes=GOI()) - dev.off() + grDevices::dev.off() } } ) diff --git a/R/shinyModules.R b/R/shinyModules.R index c821370..d8f99a7 100644 --- a/R/shinyModules.R +++ b/R/shinyModules.R @@ -3,8 +3,54 @@ NULL # Cluster Solution DE boxplots ------------- -plot_clustSep <- function(sCVdL,DEtype,FDRthresh,res,Xlim,Ylim) { + +#' scClustViz plot: Cluster separation boxplots +#' +#' This function plots metrics of cluster solution cohesion or overfitting as a +#' function of the number of clusters found. +#' +#' @param sCVdL A named list of sCVdata objects, output of +#' \code{\link{CalcAllSCV}}. +#' @param DEtype One of "DEneighb", "DEmarker", or "silWidth". "DEneighb" shows +#' number of significantly differentially expressed genes between nearest +#' neighbouring clusters. "DEmarker" shows number of marker genes per cluster, +#' significantly positively differentially expressed genes in all pairwise +#' comparisons with other clusters. "silWidth" shows silhouette widths with +#' average silhouette width as a trace across all clustering solutions. (see +#' \code{\link[cluster]{silhouette}}). +#' @param FDRthresh Default=0.05. The false discovery rate threshold for +#' determining significance of differential gene expression. +#' @param res Optional. Name of cluster resolution to highlight. Must be one of +#' \code{names(sCVdL)}. +#' @param Xlim Optional. Passed to +#' \code{\link[graphics]{plot.default}(xlim=Xlim)}. +#' @param Ylim Optional. Passed to +#' \code{\link[graphics]{plot.default}(ylim=Ylim)}. +#' +#' @examples +#' \dontrun{ +#' plot_clustSep(sCVdL,DEtype="DEneighb",FDRthresh=0.05,res="res.0.8") +#' } +#' +#' @export + +plot_clustSep <- function(sCVdL,DEtype,FDRthresh=0.05,res,Xlim,Ylim) { + if (missing(Xlim)) { Xlim <- NULL } + if (missing(Ylim)) { Ylim <- NULL } + if (missing(res)) { res <- "" } + if (!res %in% c(names(sCVdL),"")) { + warning(paste(paste0("res = '",res,"' not found in cluster resolutions."), + "Cluster resolutions are names(sCVdL):", + paste(names(sCVdL),collapse=", "),sep="\n ")) + } + if (!DEtype %in% c("DEneighb","DEmarker","silWidth")) { + stop('DEtype must be one of "DEneighb", "DEmarker", or "silWidth".') + } numClust <- sapply(sCVdL,function(X) length(levels(Clusters(X)))) + for (X in unique(numClust[duplicated(numClust)])) { + numClust[numClust == X] <- seq(X-.25,X+.25,length.out=sum(numClust == X)) + } + if (is.null(Xlim)) { Xlim <- range(numClust) } bpData <- sapply(sCVdL,function(X) switch(DEtype, # DR=DEdist(X,"DR"), @@ -26,10 +72,10 @@ plot_clustSep <- function(sCVdL,DEtype,FDRthresh,res,Xlim,Ylim) { } else { par(mar=c(3,3,2,1),mgp=2:0) if (DEtype == "silWidth") { - plot(x=NA,y=NA,xlim=Xlim + c(-.5,.5),ylim=Ylim, + plot(x=NA,y=NA,xlim=Xlim + c(-.5,.5),ylim=Ylim,xaxt="n", xlab="Number of clusters",ylab="Silhouette width per cluster") } else { - plot(x=numClust,y=sapply(bpData,median),type="l", + plot(x=numClust,y=sapply(bpData,median),type="l",xaxt="n", xlim=Xlim + c(-.5,.5),ylim=Ylim,xlab="Number of clusters", ylab=switch(DEtype, # DR="Distance between clusters by gene detection rates", @@ -38,11 +84,17 @@ plot_clustSep <- function(sCVdL,DEtype,FDRthresh,res,Xlim,Ylim) { DEmarker="Positive DE genes per cluster to all other clusters", DEneighb="Positive DE genes per cluster to nearest cluster")) } + axis(side=3,at=seq(round(min(numClust)) - 0.5,round(max(numClust)) + 0.5,by=1), + labels=F,tick=T,pos=par("usr")[3]) + axis(side=1,at=seq(round(min(numClust)) - 0.5,round(max(numClust)) + 0.5,by=1), + labels=F,tick=T,pos=par("usr")[3]) + axis(side=1,at=seq(round(min(numClust)),round(max(numClust)),by=1),labels=T,tick=F) + abline(h=seq(0,max(unlist(bpData)),switch(as.character(diff(Ylim) > 1000), "FALSE"=10,"TRUE"=100)), lty=3,col=alpha(1,0.3)) for (i in names(bpData)[names(bpData) != res]) { - boxplot(bpData[[i]],add=T,at=numClust[i],yaxt="n") + boxplot(bpData[[i]],add=T,at=numClust[i],yaxt="n",col=alpha("white",.5)) } if (any(names(bpData) == res)) { if (DEtype == "silWidth") { @@ -71,9 +123,18 @@ plot_clustSep <- function(sCVdL,DEtype,FDRthresh,res,Xlim,Ylim) { } } -# plot_clustSep(sCVdL,DEtype="deMarker",FDRthresh=0.05,res="res.0.8",Ylim=NULL,Xlim=NULL) # Silhouette plot ------ + +#' scClustViz plot: Silhouette plot +#' +#' This function is a wrapper to \code{plot(silhouette(x))}. +#' +#' @param sCVd An \code{\link{sCVdata}} object with a non-null \code{Silhouette} +#' slot. +#' +#' @export + plot_sil <- function(sCVd) { par(mar=c(4.5,.5,1.5,1.5),mgp=2:0) plot(Silhouette(sCVd), @@ -84,7 +145,27 @@ plot_sil <- function(sCVd) { # tsnePlot ------------------- + +#' scClustViz plot element: Cluster names on cluster centroid. +#' +#' See \code{\link{plot_tsne}} for application. +#' +#' @param sCVd An sCVdata object. +#' @param cell_coord A numeric matrix where named rows are cells, and two +#' columns are the x and y dimensions of the cell embedding. +#' @param lab_type One of "ClusterNames", "ClusterNamesAll", or "Clusters". +#' "ClusterNames" places cluster names (added to sCVdata object by +#' \code{\link{labelCellTypes}}) at the centroid of all points sharing that +#' cluster name (can span clusters). "ClusterNamesAll" places cluster names at +#' the centroid of each cluster. "Clusters" places cluster ID +#' (\code{levels(Clusters(sCVd))}) at the centroid of each cluster. +#' +#' @export + tsne_labels <- function(sCVd,cell_coord,lab_type) { + if (!lab_type %in% c("ClusterNames","ClusterNamesAll","Clusters")) { + stop('lab_type must be one of "ClusterNames","ClusterNamesAll","Clusters"') + } if (lab_type == "ClusterNames") { temp_labelNames <- sapply(unique(attr(Clusters(sCVd),"ClusterNames")),function(X) names(which(attr(Clusters(sCVd),"ClusterNames") == X)),simplify=F) @@ -106,7 +187,60 @@ tsne_labels <- function(sCVd,cell_coord,lab_type) { return(temp_labels) } -plot_tsne <- function(cell_coord,md,md_title,md_log,label, +#' scClustViz plot: Plot cell embedding in 2D +#' +#' This function plots cells in two dimensions, with various overlays. +#' +#' @param cell_coord A numeric matrix where named rows are cells, and two +#' columns are the x and y dimensions of the cell embedding. +#' @param md The overlay information. Either a factor or numeric vector matching +#' the rows (cells) of the \code{cell_coord} matrix. If this is a factor, the +#' cells will be coloured by the factor levels. If a numeric vector, the cells +#' will be coloured using the \code{\link[viridis]{viridis}} colourscale. +#' @param md_title NULL or a character vector of one. If NULL, \code{md} is +#' assumed to be cluster assignments. Otherwise this should be the title of +#' the overlay represented by \code{md}. +#' @param md_log Default=FALSE. Logical vector of length one indicating whether +#' \code{md} should be log-transformed. Only to be used when \code{md} is +#' numeric. +#' @param label Default=NULL. The output of \code{\link{tsne_labels}} to have +#' cluster names overlaid on the plot. +#' @param sel_cells Optional. A character vector of cell names (rownames of +#' \code{cell_coord}) to highlight in the plot. +#' @param sel_cells_A Optional. Alternative highlighting method to sel_cells, +#' can be used in conjunction. Meant for indicating a selected set of cells +#' when building manual cell set comparisons, in conjunction with +#' \code{sel_cells_B}. +#' @param sel_cells_B Optional. See \code{sel_cells_A}. +#' +#' @examples +#' \dontrun{ +#' # Cluster overlay: +#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), +#' md=Clusters(sCVdata), +#' md_title=NULL, +#' label=tsne_labels(sCVd=sCVdata, +#' cell_coord=getEmb(input_data_obj,"tsne"), +#' lab_type="ClusterNames")) +#' +#' # Metadata overlay: +#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), +#' md=getMD(input_data_obj)$total_counts, +#' md_title="Library Size", +#' md_log=TRUE, +#' label=tsne_labels(sCVd=sCVdata, +#' cell_coord=getEmb(input_data_obj,"tsne"), +#' lab_type="ClusterNames")) +#' +#' # Gene expression overlay: +#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), +#' md=getExpr(input_data_obj,Param(sCVdata,"assayType"))["Actb",], +#' md_title="Actb") +#' } +#' +#' @export + +plot_tsne <- function(cell_coord,md,md_title,md_log=F,label=NULL, sel_cells,sel_cells_A,sel_cells_B) { if (is.null(md_title)) { id <- as.factor(md) @@ -270,7 +404,41 @@ plot_mdBoxplotY <- function(MD,sel_clust,md_log) { } +#' scClustViz plot: Plot to compare cell metadata +#' +#' This function makes scatter/boxplots comparing cellular metadata. +#' +#' @param MD A dataframe of cellular metadata. See \code{\link{getMD}}. +#' @param mdX A character vector of one refering to the variable name from +#' \code{MD} to plot on the x-axis. +#' @param mdY A character vector of one refering to the variable name from +#' \code{MD} to plot on the y-axis. +#' @param sel_cells Optional. A character vector of cell names (rownames of +#' \code{MD}) to highlight in the plot. +#' @param sel_clust Optional. The name of the selected cluster +#' (\code{sel_cells}) to include in the legend. If +#' \code{\link{labelCellTypes}} has been run, pass the appropriate element of +#' \code{attr(Clusters(sCV),"ClusterNames")} to this argument to show both +#' cluster number and cell type label in the legend. +#' @param md_log Optional. A character vector indicating which axes should be +#' log scaled. \code{c("x","y")} to log-scale both axes. +#' +#' @examples +#' \dontrun{ +#' plot_mdCompare(MD=getMD(input_data_obj), +#' mdX="total_counts", +#' mdY="total_features", +#' sel_cells=names(Clusters(sCVdata))[Clusters(sCVdata) == "1"], +#' sel_clust="1", +#' md_log="x") +#' } +#' +#' @export + plot_mdCompare <- function(MD,mdX,mdY,sel_cells,sel_clust,md_log) { + if (missing(sel_cells)) { sel_cells <- "" } + if (missing(sel_clust)) { sel_clust <- "" } + if (missing(md_log)) { md_log <- "" } MD <- data.frame(MD[,c(mdX,mdY)]) MD$sel_cells <- rownames(MD) %in% sel_cells if ("x" %in% md_log & !(is.factor(MD[,1]) | is.character(MD[,1]))) { @@ -316,14 +484,6 @@ plot_mdCompare <- function(MD,mdX,mdY,sel_cells,sel_clust,md_log) { } -# plot_mdCompare(MD=getMD(inD), -# mdX="total_counts", -# mdY="cyclonePhases", -# sel_cells=Clusters(sCVdL$res.1.4) == "1", # rep(F,nrow(getMD(inD))), -# sel_clust=attr(Clusters(sCVdL$res.1.4),"ClusterNames")[1], -# md_log=c("x","y")) - - # ^ mdPerClust ------- plot_mdBarplot <- function(MD,opt) { temp_par <- par(no.readonly=T) @@ -364,7 +524,34 @@ plot_mdBoxplot <- function(MD,opt) { par(temp_par) } -plot_mdPerClust <- function(MD,sel,cl,opt) { + +#' scClustViz plot: Plot to view cellular metadata by cluster +#' +#' This function makes boxplots / stacked barplots of cellular metadata +#' separated by cluster. +#' +#' @param MD A dataframe of cellular metadata. See \code{\link{getMD}}. +#' @param sel A character vector of one refering to the variable name from +#' \code{MD} to plot. +#' @param cl A factor of cluster assignments. See \code{\link{Cluster}}. +#' @param opt Default="absolute". A character vector of plotting options. One of +#' \code{"absolute"}, \code{"relative"}, or \code{"y"}. \code{"y"} sets +#' log-scales the data for postive numerical metadata. For categorical +#' metadata, \code{"absolute"} plots a stacked barplot of raw counts, whereas +#' \code{"relative"} plots the proportion of each cluster represented by each +#' category. +#' +#' @examples +#' \dontrun{ +#' plot_mdPerClust(MD=getMD(input_data_obj), +#' sel="cyclonePhases", +#' cl=Clusters(sCVdata), +#' opt="relative") +#' } +#' +#' @export + +plot_mdPerClust <- function(MD,sel,cl,opt="absolute") { MD <- MD[sel] MD$cl <- cl if ("y" %in% opt & !(is.factor(MD[,1]) | is.character(MD[,1]))) { @@ -384,10 +571,32 @@ plot_mdPerClust <- function(MD,sel,cl,opt) { } } -# plot_mdPerClust(MD=getMD(inD),sel="cyclonePhases",cl=Clusters(sCVdL$res.0.8),opt="absolute") - # DE gene dotplot ----------- + +#' scClustViz plot helper function: Return DE genes per cluster +#' +#' This function returns a named numeric vector of FDR-corrected p-values for +#' statistically significant differentially expressed genes for a set comparison +#' type and FDR threshold. For \code{"DEmarker"}, the returned value is the max +#' of all comparisons. +#' +#' @param sCVd The sCVdata object. +#' @param DEtype One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}}; +#' \code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see +#' \code{\link{DEmarker}}. +#' @param FDRthresh A numeric vector of length 1 setting a false discovery rate +#' threshold for statistical significance. +#' +#' @examples +#' \dontrun{ +#' dotplotDEgenes(sCVdata, +#' DEtype="DEneighb", +#' FDRthresh=0.01) +#' } +#' +#' @export + dotplotDEgenes <- function(sCVd,DEtype,FDRthresh) { if (missing(FDRthresh)) { FDRthresh <- 1 } if (DEtype == "DEvsRest") { @@ -415,10 +624,35 @@ dotplotDEgenes <- function(sCVd,DEtype,FDRthresh) { return(outL) } } -# returns A named list of named numeric vectors, one entry for each cluster. -# Names are genes passing FDR threshold, and value is (maximum) FDR of comparison. +#' scClustViz plot: Plot gene expression dotplots. +#' +#' This function makes dotplots (a heatmap analogue) showing gene expression for +#' a set of genes across all clusters. +#' +#' When generated in an interactive context (i.e. RStudio), this can sometimes +#' result in a \code{figure margins too large} error. See example for suggested +#' dimensions of the graphic device. +#' +#' @param sCVd The sCVdata object. +#' @param DEgenes The output of \code{\link{dotplotDEgenes}}. +#' @param DEnum Single integer representing the maximum number of DE genes per +#' cluster to include in the dotplot. +#' +#' @examples +#' \dontrun{ +#' pdf("filepath.pdf",width=11,height=7) +#' plot_deDotplot(sCVd=sCVdata, +#' DEgenes=dotplotDEgenes(sCVdata, +#' DEtype="DEneighb", +#' FDRthresh=0.01) +#' DEnum=5) +#' dev.off() +#' } +#' +#' @export + plot_deDotplot <- function(sCVd,DEgenes,DEnum) { # ^ Setup ---- heatGenes <- unique(unlist(lapply(DEgenes,function(X) names(X)[1:DEnum]))) @@ -480,7 +714,11 @@ plot_deDotplot <- function(sCVd,DEgenes,DEnum) { return(X) }) - tempLabCol <- ClustGeneStats(sCVd)[[1]][heatGenes,"genes"] + if ("genes" %in% names(ClustGeneStats(sCVd)[[1]])) { + tempLabCol <- ClustGeneStats(sCVd)[[1]][heatGenes,"genes"] + } else { + tempLabCol <- rownames(ClustGeneStats(sCVd)[[1]][heatGenes,]) + } DR <- temp_DR[hG$order,hC$order,drop=F] temp <- range(sapply(ClustGeneStats(sCVd),function(X) X[,"MDGE"])) temp <- seq(temp[1],temp[2],length.out=101) @@ -575,6 +813,60 @@ singleDot <- function(col1){ } +#' scClustViz plot: Plot within-cluster gene expression highlighting marker +#' genes +#' +#' This function makes a scatterplot of gene detection rate vs. mean detected +#' gene abundance, highlighting genes identified as cell type specific markers +#' by the user. \strong{This function will not work unless +#' \code{\link{addCellMarkersToCGS}} has been run on the sCVdata object prior.} +#' +#' @param sCVd The sCVdata object. +#' @param selClust A named character vector representing the cluster to be +#' displayed. If \code{\link{labelCellTypes}} has been run, pass the +#' appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +#' argument to show both cluster number and cell type label in the legend. +#' @param cellMarkersU Derived from the \code{cellMarkers} argument to +#' \code{\link{runShiny}}. A list of the unique gene symbols for each cell +#' type in \code{cellMarkers}. +#' @param cellMarkersS Derived from the \code{cellMarkers} argument to +#' \code{\link{runShiny}}. A list of the gene symbols common to two or more +#' cell types in \code{cellMarkers}. Each entry is named for the indicies of +#' \code{cellMarkers} that share the gene. +#' +#' @examples +#' \dontrun{ +#' cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6", +#' "Pcna","Nes","Cux1","Cux2"), +#' "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6", +#' "Tubb3","Rbfox3","Dcx"), +#' "Cajal-Retzius neurons"="Reln", +#' "Intermediate progenitors"="Eomes", +#' "Projection neurons"=c("Tbr1","Satb2","Fezf2", +#' "Bcl11b","Tle4","Nes", +#' "Cux1","Cux2","Tubb3", +#' "Rbfox3","Dcx") +#' ) +#' cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2, +#' function(X) do.call(intersect,unname(cellMarkers[X]))) +#' try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2, +#' function(X) paste(X,collapse="&")),silent=T) +#' cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] +#' cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) +#' sCVdata <- addCellMarkersToCGS(sCVdata, +#' cellMarkersU=cellMarkersU, +#' cellMarkersS=cellMarkersS, +#' symbolMap=NULL) +#' +#' pdf("filepath.pdf",width=12,height=7) +#' plot_clusterGenes_markers(sCVd=sCVdata, +#' selClust="1", +#' cellMarkersS=cellMarkersS +#' cellMarkersU=cellMarkersU) +#' dev.off() +#' } +#' +#' @export plot_clusterGenes_markers <- function(sCVd,selClust,cellMarkersS,cellMarkersU) { cellMarkCols <- rainbow2(length(cellMarkersU)) @@ -619,9 +911,9 @@ plot_clusterGenes_markers <- function(sCVd,selClust,cellMarkersS,cellMarkersU) { col2=cellMarkCols[as.integer(temp[2])])) } tempLabels <- spreadLabels2(CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"DR"], - CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"MDGE"], - CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"genes"], - str.cex=1.2,str.font=2) + CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"MDGE"], + CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"genes"], + str.cex=1.2,str.font=2) rownames(tempLabels) <- CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"genes"] for (gn in CGS[CGS$cMu & CGS$overCut,"genes"]) { rect(xleft=tempLabels[gn,1] - strwidth(gn,cex=1.2,font=2) * .5, @@ -648,6 +940,38 @@ plot_clusterGenes_markers <- function(sCVd,selClust,cellMarkersS,cellMarkersU) { } +#' scClustViz plot: Plot within-cluster gene expression highlighting DE genes +#' +#' This function makes a scatterplot of gene detection rate vs. mean detected +#' gene abundance, highlighting differentially expressed genes. +#' +#' @param sCVd The sCVdata object. +#' @param selClust A named character vector representing the cluster to be +#' displayed. If \code{\link{labelCellTypes}} has been run, pass the +#' appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +#' argument to show both cluster number and cell type label in the legend. +#' @param DEgenes The output of \code{\link{dotplotDEgenes}}. +#' @param DEnum Single integer representing the maximum number of DE genes per +#' cluster to include in the plot. +#' @param DEtype One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}}; +#' \code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see +#' \code{\link{DEmarker}}. +#' +#' @examples +#' \dontrun{ +#' pdf("filepath.pdf",width=12,height=7) +#' plot_clusterGenes_DEgenes(sCVd=sCVdata, +#' selClust="1", +#' DEgenes=dotplotDEgenes(sCVdata, +#' DEtype="DEneighb", +#' FDRthresh=0.01), +#' DEnum=5, +#' DEtype="DEneighb") +#' dev.off() +#' } +#' +#' @export + plot_clusterGenes_DEgenes <- function(sCVd,selClust,DEgenes,DEnum,DEtype) { par(mar=c(3,3,3,20),mgp=2:0) if (selClust == "") { @@ -675,29 +999,32 @@ plot_clusterGenes_DEgenes <- function(sCVd,selClust,DEgenes,DEnum,DEtype) { DEG <- DEG[!is.na(DEG)] points(x=CGS[DEG,"DR"],y=CGS[DEG,"MDGE"], pch=16,cex=1.2,col="firebrick2") + if (!"overCut" %in% names(CGS)) { CGS$overCut <- T } if (any(CGS[DEG,"overCut"])) { - tempLabels <- spreadLabels2(x=CGS[DEG,"DR"][CGS[DEG,"overCut"]], - y=CGS[DEG,"MDGE"][CGS[DEG,"overCut"]], - label=CGS[DEG,"genes"][CGS[DEG,"overCut"]], - str.cex=1.2,str.font=2) + labelDF <- CGS[DEG,] + labelDF <- labelDF[labelDF$overCut,] + if (!"genes" %in% names(labelDF)) { labelDF$genes <- rownames(labelDF) } + tempLabels <- spreadLabels2(x=labelDF$DR,y=labelDF$MDGE, + label=labelDF$genes, + str.cex=1.2,str.font=2) rect(xleft=tempLabels[,1] - - strwidth(CGS[DEG,"genes"][CGS[DEG,"overCut"]],cex=1.2,font=2) * .5, + strwidth(labelDF$genes,cex=1.2,font=2) * .5, xright=tempLabels[,1] + - strwidth(CGS[DEG,"genes"][CGS[DEG,"overCut"]],cex=1.2,font=2) * .5, + strwidth(labelDF$genes,cex=1.2,font=2) * .5, ybottom=tempLabels[,2] - - strheight(CGS[DEG,"genes"][CGS[DEG,"overCut"]],cex=1.2,font=2) * .5, + strheight(labelDF$genes,cex=1.2,font=2) * .5, ytop=tempLabels[,2] + - strheight(CGS[DEG,"genes"][CGS[DEG,"overCut"]],cex=1.2,font=2) * .5, + strheight(labelDF$genes,cex=1.2,font=2) * .5, border=NA,col=alpha("white",0.5)) text(tempLabels,cex=1.2,font=2,col="firebrick2", - labels=CGS[DEG,"genes"][CGS[DEG,"overCut"]]) + labels=labelDF$genes) } } temp_n <- length(DEgenes[[selClust]]) temp_lab <- switch(DEtype, DEvsRest=" DE genes vs rest of cells in sample", DEmarker=" marker genes", - deNeighb=" DE genes vs nearest neighbouring cluster") + DEneighb=" DE genes vs nearest neighbouring cluster") legend("top",bty="n",pch=16,col="firebrick2", legend=paste0(temp_n,temp_lab," (showing top ", min(temp_n,DEnum),")")) @@ -705,6 +1032,30 @@ plot_clusterGenes_DEgenes <- function(sCVd,selClust,DEgenes,DEnum,DEtype) { } } + +#' scClustViz plot: Plot within-cluster gene expression highlighting selected genes +#' +#' This function makes a scatterplot of gene detection rate vs. mean detected +#' gene abundance, highlighting specified genes. +#' +#' @param sCVd The sCVdata object. +#' @param selClust A named character vector representing the cluster to be +#' displayed. If \code{\link{labelCellTypes}} has been run, pass the +#' appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +#' argument to show both cluster number and cell type label in the legend. +#' @param GOI A character vector of gene names to highlight. +#' +#' @examples +#' \dontrun{ +#' pdf("filepath.pdf",width=12,height=7) +#' plot_clusterGenes_search(sCVd=sCVdata, +#' selClust="1", +#' GOI=c("Actb","Sox2")) +#' dev.off() +#' } +#' +#' @export + plot_clusterGenes_search <- function(sCVd,selClust,GOI) { par(mar=c(3,3,3,20),mgp=2:0) if (selClust == "") { @@ -714,11 +1065,12 @@ plot_clusterGenes_search <- function(sCVd,selClust,GOI) { "to see gene expression for that cluster.",sep="\n")) } else { CGS <- ClustGeneStats(sCVd)[[selClust]] + if (!"genes" %in% names(CGS)) { CGS$genes <- rownames(CGS) } temp_ylab <- switch(as.character(Param(sCVd,"exponent") == exp(1)), "TRUE"="(natural log scale)", "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)")) plot(MDGE~DR, - data=CGS[!rownames(CGS) %in% GOI,], + data=CGS[!CGS$genes %in% GOI,], col=alpha("black",0.2),pch=20, xlim=range(CGS$DR),ylim=range(CGS$MDGE), xlab="Proportion of cells in which gene was detected", @@ -727,30 +1079,32 @@ plot_clusterGenes_search <- function(sCVd,selClust,GOI) { mtext(paste("Cells:",sum(Clusters(sCVd) == selClust), " Genes detected:",sum(CGS$DR > 0)),side=3,line=0,cex=0.9) box(col=rainbow2(length(levels(Clusters(sCVd))))[selClust],lwd=2) + GOI <- GOI[GOI %in% CGS$genes] if (length(GOI) > 0) { points(x=CGS[GOI,"DR"],y=CGS[GOI,"MDGE"], pch=16,cex=1.2,col="firebrick2") - if (any(CGS[GOI,"overCut"])) { - tempLabels <- spreadLabels2(x=CGS[GOI,"DR"][CGS[GOI,"overCut"]], - y=CGS[GOI,"MDGE"][CGS[GOI,"overCut"]], - label=CGS[GOI,"genes"][CGS[GOI,"overCut"]], - str.cex=1.2,str.font=2) - rect(xleft=tempLabels[,1] - - strwidth(CGS[GOI,"genes"][CGS[GOI,"overCut"]],cex=1.2,font=2) * .5, - xright=tempLabels[,1] + - strwidth(CGS[GOI,"genes"][CGS[GOI,"overCut"]],cex=1.2,font=2) * .5, - ybottom=tempLabels[,2] - - strheight(CGS[GOI,"genes"][CGS[GOI,"overCut"]],cex=1.2,font=2) * .5, - ytop=tempLabels[,2] + - strheight(CGS[GOI,"genes"][CGS[GOI,"overCut"]],cex=1.2,font=2) * .5, - border=NA,col=alpha("white",0.5)) - text(tempLabels,cex=1.2,font=2,col="firebrick2", - labels=CGS[GOI,"genes"][CGS[GOI,"overCut"]]) - } + + labelDF <- CGS[GOI,] + if (!"genes" %in% names(labelDF)) { labelDF$genes <- rownames(labelDF) } + tempLabels <- spreadLabels2(x=labelDF$DR,y=labelDF$MDGE, + label=labelDF$genes, + str.cex=1.2,str.font=2) + rect(xleft=tempLabels[,1] - + strwidth(labelDF$genes,cex=1.2,font=2) * .5, + xright=tempLabels[,1] + + strwidth(labelDF$genes,cex=1.2,font=2) * .5, + ybottom=tempLabels[,2] - + strheight(labelDF$genes,cex=1.2,font=2) * .5, + ytop=tempLabels[,2] + + strheight(labelDF$genes,cex=1.2,font=2) * .5, + border=NA,col=alpha("white",0.5)) + text(tempLabels,cex=1.2,font=2,col="firebrick2", + labels=labelDF$genes) } } } + # Gene search function ------- geneSearch <- function(txt,st,CGS) { if (length(txt) < 1) { txt <- ""} @@ -761,14 +1115,15 @@ geneSearch <- function(txt,st,CGS) { temp_in <- strsplit(txt,split="[\\s,]",perl=T)[[1]] temp_out <- geneNames[toupper(temp_in)] names(temp_out) <- CGS[temp_out,"genes"] - return(temp_out) + temp_out }, regex={ temp_in <- grep(txt,names(geneNames),ignore.case=T) temp_out <- geneNames[temp_in] names(temp_out) <- CGS[temp_out,"genes"] - return(temp_out) + temp_out }) + temp <- temp[!is.na(temp)] if (length(temp) > 0) { return(temp) } else { @@ -781,8 +1136,35 @@ geneSearch <- function(txt,st,CGS) { } } + # Gene expression boxplots -------- -plot_GEboxplot <- function(nge,sCVd,gene,geneName,opts) { + +#' scClustViz plot: Compare gene expression across clusters +#' +#' This function generates boxplots comparing normalized gene abundance across +#' all clusters. +#' +#' @param nge The gene expression matrix, see \code{\link{getExprs}}. +#' @param sCVd The sCVdata object. +#' @param gene The gene to display. +#' @param geneName Optional. A named character vector of length one. The element +#' is the full gene name, and the name is the gene symbol. +#' @param opts Default=\code{c("sct","dr")}. A character vector with plotting +#' options. If it includes \code{"sct"}, data points will be overlaid as a +#' jitter over the boxplot. If it includes \code{"dr"}, detection rate per +#' cluster will be plotted as a small black bar over each boxplot, with the +#' corresponding axis on the right. +#' +#' @examples +#' \dontrun{ +#' plot_GEboxplot(getExpr(input_data_obj), +#' sCVd=sCVdata, +#' gene="Actb") +#' } +#' +#' @export + +plot_GEboxplot <- function(nge,sCVd,gene,geneName,opts=c("sct","dr")) { if (gene == "") { plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) text(.5,.5,paste("Select a gene by either clicking on the plot above", @@ -815,6 +1197,7 @@ plot_GEboxplot <- function(nge,sCVd,gene,geneName,opts) { )) mtext(levels(Clusters(sCVd))[temp_pos],side=1,line=0,at=seq_along(temp_pos)) mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) + if (missing(geneName)) { geneName <- NULL } if (is.null(geneName)) { mtext(paste(gene,collapse="\n"),side=1,line=2,font=2) } else { @@ -923,7 +1306,7 @@ plot_compareClusts_MAplot <- function(sCVd,clA,clB,dataType,labType,labNum,labGe if (length(labGenes) > 0) { points(y_mean~x_diff,data=CGS[labGenes,],pch=16,col=alpha("firebrick2",0.8)) tempLabel <- spreadLabels2(x=CGS[labGenes,"x_diff"],y=CGS[labGenes,"y_mean"], - label=labGenes,str.cex=1.2,str.font=2) + label=labGenes,str.cex=1.2,str.font=2) text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2) } } else { @@ -932,7 +1315,7 @@ plot_compareClusts_MAplot <- function(sCVd,clA,clB,dataType,labType,labNum,labGe points(y_mean~x_diff,data=CGS[gnB,],pch=16, col=rainbow2(length(levels(Clusters(sCVd))),.8)[which(levels(Clusters(sCVd)) == clB)]) tempLabel <- spreadLabels2(x=CGS[c(gnA,gnB),"x_diff"],y=CGS[c(gnA,gnB),"y_mean"], - label=c(gnA,gnB),str.cex=1.2,str.font=2) + label=c(gnA,gnB),str.cex=1.2,str.font=2) rownames(tempLabel) <- c(gnA,gnB) text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2, col=rainbow2(length(levels(Clusters(sCVd))))[which(levels(Clusters(sCVd)) == clA)]) @@ -959,6 +1342,7 @@ plot_compareClusts_DEscatter <- function(sCVd,clA,clB,dataType,labType, labTypeDiff,labNum,labGenes) { # ^ setup ----- CGS <- compareClusts_DF(sCVd,clA,clB,dataType) + labGenes <- labGenes[labGenes %in% rownames(CGS)] temp_exp <- switch(as.character(Param(sCVd,"exponent") == exp(1)), "TRUE"="(natural log scale)", "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)")) @@ -995,7 +1379,7 @@ plot_compareClusts_DEscatter <- function(sCVd,clA,clB,dataType,labType, if (length(labGenes) > 0) { points(logGER~dDR,data=CGS[labGenes,],pch=16,col=alpha("firebrick2",0.8)) tempLabel <- spreadLabels2(CGS[labGenes,"dDR"],CGS[labGenes,"logGER"], - label=labGenes,str.cex=1.2,str.font=2) + label=labGenes,str.cex=1.2,str.font=2) text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2) } } else { @@ -1004,7 +1388,7 @@ plot_compareClusts_DEscatter <- function(sCVd,clA,clB,dataType,labType, points(logGER~dDR,data=CGS[gnB,],pch=16, col=rainbow2(length(levels(Clusters(sCVd))),.8)[which(levels(Clusters(sCVd)) == clB)]) tempLabel <- spreadLabels2(CGS[c(gnA,gnB),"dDR"],CGS[c(gnA,gnB),"logGER"], - label=c(gnA,gnB),str.cex=1.2,str.font=2) + label=c(gnA,gnB),str.cex=1.2,str.font=2) rownames(tempLabel) <- c(gnA,gnB) text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2, col=rainbow2(length(levels(Clusters(sCVd))))[which(levels(Clusters(sCVd)) == clA)]) @@ -1022,6 +1406,7 @@ plot_compareClusts_volcano <- function(sCVd,clA,clB,dataType,labType,labNum,labG CGS <- compareClusts_DF(sCVd,clA,clB,dataType) CGS <- CGS[!is.na(CGS$FDR),] CGS$FDR <- -log10(CGS$FDR) + labGenes <- labGenes[labGenes %in% rownames(CGS)] temp_exp <- switch(as.character(Param(sCVd,"exponent") == exp(1)), "TRUE"="(natural log scale)", "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)")) @@ -1052,7 +1437,7 @@ plot_compareClusts_volcano <- function(sCVd,clA,clB,dataType,labType,labNum,labG points(x=CGS[labGenes,dataType],y=CGS[labGenes,"FDR"], pch=16,col=alpha("firebrick2",0.8)) tempLabel <- spreadLabels2(x=CGS[labGenes,dataType],y=CGS[labGenes,"FDR"], - label=labGenes,str.cex=1.2,str.font=2) + label=labGenes,str.cex=1.2,str.font=2) text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2) } } else { @@ -1061,7 +1446,7 @@ plot_compareClusts_volcano <- function(sCVd,clA,clB,dataType,labType,labNum,labG points(CGS[gnB,dataType],y=CGS[gnB,"FDR"],pch=16, col=rainbow2(length(levels(Clusters(sCVd))),.8)[which(levels(Clusters(sCVd)) == clB)]) tempLabel <- spreadLabels2(CGS[c(gnA,gnB),dataType],CGS[c(gnA,gnB),"FDR"], - label=c(gnA,gnB),str.cex=1.2,str.font=2) + label=c(gnA,gnB),str.cex=1.2,str.font=2) rownames(tempLabel) <- c(gnA,gnB) text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2, col=rainbow2(length(levels(Clusters(sCVd))))[which(levels(Clusters(sCVd)) == clA)]) @@ -1075,7 +1460,48 @@ plot_compareClusts_volcano <- function(sCVd,clA,clB,dataType,labType,labNum,labG } -plot_compareClusts <- function(sCVd,clA,clB,dataType,labType,labTypeDiff,labNum,labGenes) { +#' scClustViz plot: Volcano and MA-style plots to compare clusters +#' +#' This function generates scatterplots inspired by volcano and MA plots for +#' comparing gene expression between pairs of clusters. +#' +#' @param sCVd The sCVdata object. +#' @param clA Cluster identifier for side A of the comparison. +#' @param clB Cluster identifier for side B of the comparison. +#' @param dataType For MA-style plots comparing difference and mean of gene +#' summary statistics, one of: \code{"DR"} (detection rate); \code{"MGE"} +#' (mean gene expression); \code{"MDGE"} (mean detected gene expression). For +#' volcano plots, the effect size measure can be one of: \code{"dDR"} +#' (difference in detection rate); \code{"logGER"} (log gene expression +#' ratio). To compare relationship between difference in detection rate and +#' log gene expression ratio, use \code{"GERvDDR"}. +#' @param labType Default="de". A character vector indicating which genes to +#' highlight. One of \code{"de"} (most statistically significant genes), +#' \code{"diff"} (most different by dataType shown), or \code{"search"} +#' (specified genes). +#' @param labGenes Only required if \code{labType="search"}. Gene names to +#' highlight. +#' @param labNum Default=5. Number of genes to highlight per side. +#' @param labTypeDiff Default="logGER". Only required if +#' \code{dataType="GERvDDR"} and \code{labType="diff"}. Which axis to use for +#' difference calculation. One of \code{"dDR"} (difference in detection rate) +#' or \code{"logGER"} (log gene expression ratio). +#' +#' @examples +#' \dontrun{ +#' plot_compareClusts(sCVdata, +#' clA="1", +#' clB="2", +#' dataType="GERvDDR", +#' labType="search", +#' labGenes="Actb") +#' } +#' +#' @export + +plot_compareClusts <- function(sCVd,clA,clB,dataType, + labType="de",labGenes, + labNum=5,labTypeDiff="logGER") { if (clA %in% levels(Clusters(sCVd)) & clB %in% levels(Clusters(sCVd))) { if (dataType %in% c("MGE","MDGE","DR")) { diff --git a/man/CalcAllSCV.Rd b/man/CalcAllSCV.Rd index 6689dc9..c9b8980 100644 --- a/man/CalcAllSCV.Rd +++ b/man/CalcAllSCV.Rd @@ -7,7 +7,7 @@ CalcAllSCV(inD, clusterDF, assayType = "", DRforClust = "pca", exponent = 2, pseudocount = 1, DRthresh = 0.1, testAll = TRUE, FDRthresh = 0.05, storeAllDE = T, calcSil = T, calcDEvsRest = T, - calcDEcombn = T) + calcDEcombn = T, UseBiocParallel = F) } \arguments{ \item{inD}{The input dataset. An object of class \code{\link[Seurat]{seurat}} @@ -98,6 +98,9 @@ perform DE testing on the same set of comparisons using a statistical method of your choice. This can be passed into your \code{sCVdata} objects in the list returned by \code{CalcAllSCV} using the function \code{\link{calcDEcombn}}. See function documentation for details.} + +\item{UseBiocParallel}{Default = FALSE. Very experimental implementation of +BiocParallel for calculations. Not recommended.} } \value{ The function returns a list containing \code{\link{sCVdata}} objects @@ -132,7 +135,7 @@ your_cluster_columns <- grepl("res[.0-9]+$", names(getMD(your_scRNAseq_data_object))) # ^ Finds the cluster columns of the metadata in a Seurat object. -your_cluster_results <- getMD(your_scRNAseq_data_object)[,your_cluster_columns] +your_cluster_results <- getMD(your_scRNAseq_data_object)[your_cluster_columns] sCVdata_list <- CalcAllSCV(inD=your_scRNAseq_data_object, clusterDF=your_cluster_results, diff --git a/man/CalcCGS.Rd b/man/CalcCGS.Rd index b321cd2..463b586 100644 --- a/man/CalcCGS.Rd +++ b/man/CalcCGS.Rd @@ -6,9 +6,9 @@ \alias{CalcCGS,sCVdata-method} \title{Calculate cluster-wise gene statistics for sCVdata} \usage{ -CalcCGS(sCVd, inD) +CalcCGS(sCVd, inD, UseBiocParallel) -\S4method{CalcCGS}{sCVdata}(sCVd, inD) +\S4method{CalcCGS}{sCVdata}(sCVd, inD, UseBiocParallel) } \arguments{ \item{sCVd}{An sCVdata object.} @@ -18,6 +18,9 @@ or \code{\link[SingleCellExperiment]{SingleCellExperiment}}. Other data classes are not currently supported. \href{https://github.com/BaderLab/scClustViz/issues}{Please submit requests for other data objects here!}} + +\item{UseBiocParallel}{Default = FALSE. Very experimental implementation of +BiocParallel for calculations. Not recommended.} } \value{ The function returns a list of dataframes. Each list element contains diff --git a/man/CalcDEcombn.Rd b/man/CalcDEcombn.Rd index cbcaa5e..6e46b10 100644 --- a/man/CalcDEcombn.Rd +++ b/man/CalcDEcombn.Rd @@ -6,9 +6,9 @@ \alias{CalcDEcombn,sCVdata-method} \title{Performs DE testing between pairs of clusters in sCVdata} \usage{ -CalcDEcombn(sCVd, inD, storeAllDE) +CalcDEcombn(sCVd, inD, storeAllDE, UseBiocParallel) -\S4method{CalcDEcombn}{sCVdata}(sCVd, inD, storeAllDE) +\S4method{CalcDEcombn}{sCVdata}(sCVd, inD, storeAllDE, UseBiocParallel) } \arguments{ \item{sCVd}{An sCVdata object.} @@ -24,6 +24,9 @@ calculate and store effect size information for all genes in the comparison (TRUE), or just those passing the detection rate threshold for the Wilcoxon rank-sum test (FALSE). Setting this to FALSE will reduce the size of the output sCVdata object.} + +\item{UseBiocParallel}{Default = FALSE. Very experimental implementation of +BiocParallel for calculations. Not recommended.} } \value{ A named list of data frames, one entry for each pairwise combination diff --git a/man/CalcDEvsRest.Rd b/man/CalcDEvsRest.Rd index 8ff6410..7efc564 100644 --- a/man/CalcDEvsRest.Rd +++ b/man/CalcDEvsRest.Rd @@ -6,9 +6,9 @@ \alias{CalcDEvsRest,sCVdata-method} \title{Calculates one vs. all DE tests for sCVdata} \usage{ -CalcDEvsRest(sCVd, inD, storeAllDE) +CalcDEvsRest(sCVd, inD, storeAllDE, UseBiocParallel) -\S4method{CalcDEvsRest}{sCVdata}(sCVd, inD, storeAllDE) +\S4method{CalcDEvsRest}{sCVdata}(sCVd, inD, storeAllDE, UseBiocParallel) } \arguments{ \item{sCVd}{An sCVdata object.} @@ -24,6 +24,9 @@ calculate and store effect size information for all genes in the comparison (TRUE), or just those passing the detection rate threshold for the Wilcoxon rank-sum test (FALSE). Setting this to FALSE will reduce the size of the output sCVdata object.} + +\item{UseBiocParallel}{Default = FALSE. Very experimental implementation of +BiocParallel for calculations. Not recommended.} } \value{ A named list of data frames, one entry for each level in diff --git a/man/CalcSCV.Rd b/man/CalcSCV.Rd index bbca561..0b9840a 100644 --- a/man/CalcSCV.Rd +++ b/man/CalcSCV.Rd @@ -6,7 +6,7 @@ \usage{ CalcSCV(inD, cl, assayType = "", DRforClust = "pca", exponent = 2, pseudocount = 1, DRthresh = 0.1, storeAllDE = T, calcSil = T, - calcDEvsRest = T, calcDEcombn = T) + calcDEvsRest = T, calcDEcombn = T, UseBiocParallel = F) } \arguments{ \item{inD}{The input dataset. An object of class \code{\link[Seurat]{seurat}} @@ -81,6 +81,9 @@ perform DE testing on the same set of comparisons using a statistical method of your choice. This can be passed into your \code{sCVdata} objects in the list returned by \code{CalcAllSCV} using the function \code{\link{calcDEcombn}}. See function documentation for details.} + +\item{UseBiocParallel}{Default = FALSE. Very experimental implementation of +BiocParallel for calculations. Not recommended.} } \value{ The function returns an \code{\link{sCVdata}} object with all slots diff --git a/man/dotplotDEgenes.Rd b/man/dotplotDEgenes.Rd new file mode 100644 index 0000000..2c81d8f --- /dev/null +++ b/man/dotplotDEgenes.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{dotplotDEgenes} +\alias{dotplotDEgenes} +\title{scClustViz plot helper function: Return DE genes per cluster} +\usage{ +dotplotDEgenes(sCVd, DEtype, FDRthresh) +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{DEtype}{One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}}; +\code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see +\code{\link{DEmarker}}.} + +\item{FDRthresh}{A numeric vector of length 1 setting a false discovery rate +threshold for statistical significance.} +} +\description{ +This function returns a named numeric vector of FDR-corrected p-values for +statistically significant differentially expressed genes for a set comparison +type and FDR threshold. For \code{"DEmarker"}, the returned value is the max +of all comparisons. +} +\examples{ +\dontrun{ +dotplotDEgenes(sCVdata, + DEtype="DEneighb", + FDRthresh=0.01) +} + +} diff --git a/man/fx_calcCGS_BP.Rd b/man/fx_calcCGS_BP.Rd new file mode 100644 index 0000000..74855a1 --- /dev/null +++ b/man/fx_calcCGS_BP.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deTest.R +\name{fx_calcCGS_BP} +\alias{fx_calcCGS_BP} +\title{Internal fx for cluster-wise gene statistics using BiocParallel} +\usage{ +fx_calcCGS_BP(nge, cl, exponent, pseudocount) +} +\arguments{ +\item{nge}{The log-normalized gene expression matrix.} + +\item{cl}{The factor with cluster assignments per cell (column of nge).} + +\item{exponent}{The log base of your normalized input data. Seurat +normalization uses the natural log (set this to exp(1)), while other +normalization methods generally use log2 (set this to 2).} + +\item{pseudocount}{The pseudocount added to all log-normalized values in your +input data. Most methods use a pseudocount of 1 to eliminate log(0) errors.} +} +\value{ +The function returns a list of dataframes. Each list element contains + a named list of clusters at that resolution. Each of those list elements + contains a dataframe of three variables, where each sample is a gene. + \code{DR} is the proportion of cells in the cluster in which that gene was + detected. \code{MDGE} is mean normalized gene expression for that gene in + only the cells in which it was detected (see \code{\link{meanLogX}} for + mean calculation). \code{MGE} is the mean normalized gene expression for + that gene in all cells of the cluster (see \code{\link{meanLogX}} for mean + calculation). +} +\description{ +Internal function. See \code{\link{CalcCGS}}. +} diff --git a/man/fx_calcDEcombn_BP.Rd b/man/fx_calcDEcombn_BP.Rd new file mode 100644 index 0000000..1b5f776 --- /dev/null +++ b/man/fx_calcDEcombn_BP.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deTest.R +\name{fx_calcDEcombn_BP} +\alias{fx_calcDEcombn_BP} +\title{Internal fx to calculate DE between combinations of clusters using BiocParallel} +\usage{ +fx_calcDEcombn_BP(nge, cl, deMes) +} +\arguments{ +\item{nge}{The log-normalized gene expression matrix.} + +\item{cl}{The factor with cluster assignments per cell (column of nge).} + +\item{deMes}{The output from \code{\link{fx_calcEScombn}}.} +} +\value{ +Differential testing results from Wilcoxon rank sum tests comparing a + gene in each cluster to that gene in every other cluster in a series of + tests. The results are stored as a nested list of dataframes. Each list + element contains a named list of clusters (cluster A). Each of those lists + contains a named list of all the other clusters (cluster B). Each of those + list elements contains a dataframe of four variables, where each sample is + a gene. \code{dDR} is the difference in detection rate of that gene between + the two clusters (DR[A] - DR[B]). \code{logGER} is the log gene expression + ratio calculated by taking the difference in mean expression of the gene + (see \code{\link{meanLogX}} for mean calculation) between the two clusters + (MGE[A] - MGE[B]). \code{Wstat} and \code{pVal} are the test statistic and + the p-value of the Wilcoxon rank sum test. \code{FDR} is the false + discovery rate-corrected p-value of the test. +} +\description{ +Internal function. See \code{\link{CalcDEcombn}}. +} +\details{ +Calculates Wilcoxon rank-sum tests for all genes in each of the potential +combinations of clusters to compare. +} diff --git a/man/fx_calcDEvsRest_BP.Rd b/man/fx_calcDEvsRest_BP.Rd new file mode 100644 index 0000000..5ae3fa5 --- /dev/null +++ b/man/fx_calcDEvsRest_BP.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deTest.R +\name{fx_calcDEvsRest_BP} +\alias{fx_calcDEvsRest_BP} +\title{Internal fx to perform one vs all DE testing using BiocParallel} +\usage{ +fx_calcDEvsRest_BP(nge, cl, deTes) +} +\arguments{ +\item{nge}{The log-normalized gene expression matrix.} + +\item{cl}{The factor with cluster assignments per cell (column of nge).} + +\item{deTes}{The output from \code{\link{fx_calcESvsRest}}.} +} +\value{ +Differential testing results from Wilcoxon rank sum tests comparing a + gene in each cluster to the rest of the cells as a whole in a one vs all + comparison. The results are stored as a named list of dataframes. There is + a list element for each cluster containing a dataframe of three variables, + where each sample is a gene. \code{logGER} is the log gene expression ratio + calculated by subtracting the mean expression of the gene (see + \link{meanLogX} for mean calculation) in all other cells from the mean + expression of the gene in this cluster. \code{Wstat} and \code{pVal} are + the test statistic and the p-value of the Wilcoxon rank sum test. + \code{FDR} is the false discovery rate-corrected p-value of the test. +} +\description{ +Internal function. See \code{\link{CalcDEvsRest}}. +} +\details{ +Calculates Wilcoxon rank-sum tests for all genes in each one-vs-all +comparison of a cluster vs the rest of the data. You probably don't need to +use this unless you're trying to customize \code{\link{clusterWiseDEtest}}. +} diff --git a/man/fx_calcESvsRest_BP.Rd b/man/fx_calcESvsRest_BP.Rd new file mode 100644 index 0000000..221719d --- /dev/null +++ b/man/fx_calcESvsRest_BP.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deTest.R +\name{fx_calcESvsRest_BP} +\alias{fx_calcESvsRest_BP} +\title{Internal fx to calculate logGER for DEvsRest calculation using BiocParallel} +\usage{ +fx_calcESvsRest_BP(nge, cl, CGS, exponent, pseudocount, DRthresh) +} +\arguments{ +\item{nge}{The log-normalized gene expression matrix.} + +\item{cl}{The factor with cluster assignments per cell (column of nge).} + +\item{CGS}{The output from \code{\link{CalcCGS}}.} + +\item{exponent}{The log base of your normalized input data. Seurat +normalization uses the natural log (set this to exp(1)), while other +normalization methods generally use log2 (set this to 2).} + +\item{pseudocount}{The pseudocount added to all log-normalized values in your +input data. Most methods use a pseudocount of 1 to eliminate log(0) errors.} + +\item{DRthresh}{The threshold for minimum detection rate of a gene in the +cluster for the gene to be considered in the following Wilcoxon rank-sum +test.} +} +\value{ +The function returns a list where each list element is the log-ratios + of gene expression when comparing each gene in a cluster to the rest of the + cells as a whole in a one vs all comparison. These logGER tables are + filtered to only include those gene that pass logGER threshold, and thus + the names for each list entry correspond to the genes to test in + \code{\link{fx_calcDEvsRest}}. +} +\description{ +Internal function. See \code{\link{CalcDEvsRest}}. +} +\details{ +Calculates the log-ratios of gene expression for all genes in each one-vs-all +comparison of a cluster vs the rest of the data. This is used to determine +the genes used in DEvsRest calculations. +} diff --git a/man/labelCellTypes.Rd b/man/labelCellTypes.Rd index 30e1c4e..3dda6fc 100644 --- a/man/labelCellTypes.Rd +++ b/man/labelCellTypes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helperFx.R \name{labelCellTypes} \alias{labelCellTypes} -\title{Add predicted cell type names to cluster labels} +\title{scClustViz helper fx: Add predicted cell type names to cluster labels} \usage{ labelCellTypes(sCV, cellMarkers, symbolMap) } diff --git a/man/plot_GEboxplot.Rd b/man/plot_GEboxplot.Rd new file mode 100644 index 0000000..0d61946 --- /dev/null +++ b/man/plot_GEboxplot.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_GEboxplot} +\alias{plot_GEboxplot} +\title{scClustViz plot: Compare gene expression across clusters} +\usage{ +plot_GEboxplot(nge, sCVd, gene, geneName, opts = c("sct", "dr")) +} +\arguments{ +\item{nge}{The gene expression matrix, see \code{\link{getExprs}}.} + +\item{sCVd}{The sCVdata object.} + +\item{gene}{The gene to display.} + +\item{geneName}{Optional. A named character vector of length one. The element +is the full gene name, and the name is the gene symbol.} + +\item{opts}{Default=\code{c("sct","dr")}. A character vector with plotting +options. If it includes \code{"sct"}, data points will be overlaid as a +jitter over the boxplot. If it includes \code{"dr"}, detection rate per +cluster will be plotted as a small black bar over each boxplot, with the +corresponding axis on the right.} +} +\description{ +This function generates boxplots comparing normalized gene abundance across +all clusters. +} +\examples{ +\dontrun{ +plot_GEboxplot(getExpr(input_data_obj), + sCVd=sCVdata, + gene="Actb") +} + +} diff --git a/man/plot_clustSep.Rd b/man/plot_clustSep.Rd new file mode 100644 index 0000000..cc6236c --- /dev/null +++ b/man/plot_clustSep.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_clustSep} +\alias{plot_clustSep} +\title{scClustViz plot: Cluster separation boxplots} +\usage{ +plot_clustSep(sCVdL, DEtype, FDRthresh = 0.05, res, Xlim, Ylim) +} +\arguments{ +\item{sCVdL}{A named list of sCVdata objects, output of +\code{\link{CalcAllSCV}}.} + +\item{DEtype}{One of "DEneighb", "DEmarker", or "silWidth". "DEneighb" shows +number of significantly differentially expressed genes between nearest +neighbouring clusters. "DEmarker" shows number of marker genes per cluster, +significantly positively differentially expressed genes in all pairwise +comparisons with other clusters. "silWidth" shows silhouette widths with +average silhouette width as a trace across all clustering solutions. (see +\code{\link[cluster]{silhouette}}).} + +\item{FDRthresh}{Default=0.05. The false discovery rate threshold for +determining significance of differential gene expression.} + +\item{res}{Optional. Name of cluster resolution to highlight. Must be one of +\code{names(sCVdL)}.} + +\item{Xlim}{Optional. Passed to +\code{\link[graphics]{plot.default}(xlim=Xlim)}.} + +\item{Ylim}{Optional. Passed to +\code{\link[graphics]{plot.default}(ylim=Ylim)}.} +} +\description{ +This function plots metrics of cluster solution cohesion or overfitting as a +function of the number of clusters found. +} +\examples{ +\dontrun{ +plot_clustSep(sCVdL,DEtype="DEneighb",FDRthresh=0.05,res="res.0.8") +} + +} diff --git a/man/plot_clusterGenes_DEgenes.Rd b/man/plot_clusterGenes_DEgenes.Rd new file mode 100644 index 0000000..9299941 --- /dev/null +++ b/man/plot_clusterGenes_DEgenes.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_clusterGenes_DEgenes} +\alias{plot_clusterGenes_DEgenes} +\title{scClustViz plot: Plot within-cluster gene expression highlighting DE genes} +\usage{ +plot_clusterGenes_DEgenes(sCVd, selClust, DEgenes, DEnum, DEtype) +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{selClust}{A named character vector representing the cluster to be +displayed. If \code{\link{labelCellTypes}} has been run, pass the +appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +argument to show both cluster number and cell type label in the legend.} + +\item{DEgenes}{The output of \code{\link{dotplotDEgenes}}.} + +\item{DEnum}{Single integer representing the maximum number of DE genes per +cluster to include in the plot.} + +\item{DEtype}{One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}}; +\code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see +\code{\link{DEmarker}}.} +} +\description{ +This function makes a scatterplot of gene detection rate vs. mean detected +gene abundance, highlighting differentially expressed genes. +} +\examples{ +\dontrun{ +pdf("filepath.pdf",width=12,height=7) +plot_clusterGenes_DEgenes(sCVd=sCVdata, + selClust="1", + DEgenes=dotplotDEgenes(sCVdata, + DEtype="DEneighb", + FDRthresh=0.01), + DEnum=5, + DEtype="DEneighb") +dev.off() +} + +} diff --git a/man/plot_clusterGenes_markers.Rd b/man/plot_clusterGenes_markers.Rd new file mode 100644 index 0000000..f26d664 --- /dev/null +++ b/man/plot_clusterGenes_markers.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_clusterGenes_markers} +\alias{plot_clusterGenes_markers} +\title{scClustViz plot: Plot within-cluster gene expression highlighting marker +genes} +\usage{ +plot_clusterGenes_markers(sCVd, selClust, cellMarkersS, cellMarkersU) +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{selClust}{A named character vector representing the cluster to be +displayed. If \code{\link{labelCellTypes}} has been run, pass the +appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +argument to show both cluster number and cell type label in the legend.} + +\item{cellMarkersS}{Derived from the \code{cellMarkers} argument to +\code{\link{runShiny}}. A list of the gene symbols common to two or more +cell types in \code{cellMarkers}. Each entry is named for the indicies of +\code{cellMarkers} that share the gene.} + +\item{cellMarkersU}{Derived from the \code{cellMarkers} argument to +\code{\link{runShiny}}. A list of the unique gene symbols for each cell +type in \code{cellMarkers}.} +} +\description{ +This function makes a scatterplot of gene detection rate vs. mean detected +gene abundance, highlighting genes identified as cell type specific markers +by the user. \strong{This function will not work unless +\code{\link{addCellMarkersToCGS}} has been run on the sCVdata object prior.} +} +\examples{ +\dontrun{ +cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6", + "Pcna","Nes","Cux1","Cux2"), + "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6", + "Tubb3","Rbfox3","Dcx"), + "Cajal-Retzius neurons"="Reln", + "Intermediate progenitors"="Eomes", + "Projection neurons"=c("Tbr1","Satb2","Fezf2", + "Bcl11b","Tle4","Nes", + "Cux1","Cux2","Tubb3", + "Rbfox3","Dcx") + ) +cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2, + function(X) do.call(intersect,unname(cellMarkers[X]))) +try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2, + function(X) paste(X,collapse="&")),silent=T) +cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] +cellMarkersU <- lapply(cellMarkers,function(X) X[!X \%in\% unlist(cellMarkersS)]) +sCVdata <- addCellMarkersToCGS(sCVdata, + cellMarkersU=cellMarkersU, + cellMarkersS=cellMarkersS, + symbolMap=NULL) + +pdf("filepath.pdf",width=12,height=7) +plot_clusterGenes_markers(sCVd=sCVdata, + selClust="1", + cellMarkersS=cellMarkersS + cellMarkersU=cellMarkersU) +dev.off() +} + +} diff --git a/man/plot_clusterGenes_search.Rd b/man/plot_clusterGenes_search.Rd new file mode 100644 index 0000000..20687f7 --- /dev/null +++ b/man/plot_clusterGenes_search.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_clusterGenes_search} +\alias{plot_clusterGenes_search} +\title{scClustViz plot: Plot within-cluster gene expression highlighting selected genes} +\usage{ +plot_clusterGenes_search(sCVd, selClust, GOI) +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{selClust}{A named character vector representing the cluster to be +displayed. If \code{\link{labelCellTypes}} has been run, pass the +appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this +argument to show both cluster number and cell type label in the legend.} + +\item{GOI}{A character vector of gene names to highlight.} +} +\description{ +This function makes a scatterplot of gene detection rate vs. mean detected +gene abundance, highlighting specified genes. +} +\examples{ +\dontrun{ +pdf("filepath.pdf",width=12,height=7) +plot_clusterGenes_search(sCVd=sCVdata, + selClust="1", + GOI=c("Actb","Sox2")) +dev.off() +} + +} diff --git a/man/plot_compareClusts.Rd b/man/plot_compareClusts.Rd new file mode 100644 index 0000000..b4da988 --- /dev/null +++ b/man/plot_compareClusts.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_compareClusts} +\alias{plot_compareClusts} +\title{scClustViz plot: Volcano and MA-style plots to compare clusters} +\usage{ +plot_compareClusts(sCVd, clA, clB, dataType, labType = "de", labGenes, + labNum = 5, labTypeDiff = "logGER") +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{clA}{Cluster identifier for side A of the comparison.} + +\item{clB}{Cluster identifier for side B of the comparison.} + +\item{dataType}{For MA-style plots comparing difference and mean of gene +summary statistics, one of: \code{"DR"} (detection rate); \code{"MGE"} +(mean gene expression); \code{"MDGE"} (mean detected gene expression). For +volcano plots, the effect size measure can be one of: \code{"dDR"} +(difference in detection rate); \code{"logGER"} (log gene expression +ratio). To compare relationship between difference in detection rate and +log gene expression ratio, use \code{"GERvDDR"}.} + +\item{labType}{Default="de". A character vector indicating which genes to +highlight. One of \code{"de"} (most statistically significant genes), +\code{"diff"} (most different by dataType shown), or \code{"search"} +(specified genes).} + +\item{labGenes}{Only required if \code{labType="search"}. Gene names to +highlight.} + +\item{labNum}{Default=5. Number of genes to highlight per side.} + +\item{labTypeDiff}{Default="logGER". Only required if +\code{dataType="GERvDDR"} and \code{labType="diff"}. Which axis to use for +difference calculation. One of \code{"dDR"} (difference in detection rate) +or \code{"logGER"} (log gene expression ratio).} +} +\description{ +This function generates scatterplots inspired by volcano and MA plots for +comparing gene expression between pairs of clusters. +} +\examples{ +\dontrun{ +plot_compareClusts(sCVdata, + clA="1", + clB="2", + dataType="GERvDDR", + labType="search", + labGenes="Actb") +} + +} diff --git a/man/plot_deDotplot.Rd b/man/plot_deDotplot.Rd new file mode 100644 index 0000000..f26689a --- /dev/null +++ b/man/plot_deDotplot.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_deDotplot} +\alias{plot_deDotplot} +\title{scClustViz plot: Plot gene expression dotplots.} +\usage{ +plot_deDotplot(sCVd, DEgenes, DEnum) +} +\arguments{ +\item{sCVd}{The sCVdata object.} + +\item{DEgenes}{The output of \code{\link{dotplotDEgenes}}.} + +\item{DEnum}{Single integer representing the maximum number of DE genes per +cluster to include in the dotplot.} +} +\description{ +This function makes dotplots (a heatmap analogue) showing gene expression for +a set of genes across all clusters. +} +\details{ +When generated in an interactive context (i.e. RStudio), this can sometimes +result in a \code{figure margins too large} error. See example for suggested +dimensions of the graphic device. +} +\examples{ +\dontrun{ +pdf("filepath.pdf",width=11,height=7) +plot_deDotplot(sCVd=sCVdata, + DEgenes=dotplotDEgenes(sCVdata, + DEtype="DEneighb", + FDRthresh=0.01) + DEnum=5) +dev.off() +} + +} diff --git a/man/plot_mdCompare.Rd b/man/plot_mdCompare.Rd new file mode 100644 index 0000000..2499bcb --- /dev/null +++ b/man/plot_mdCompare.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_mdCompare} +\alias{plot_mdCompare} +\title{scClustViz plot: Plot to compare cell metadata} +\usage{ +plot_mdCompare(MD, mdX, mdY, sel_cells, sel_clust, md_log) +} +\arguments{ +\item{MD}{A dataframe of cellular metadata. See \code{\link{getMD}}.} + +\item{mdX}{A character vector of one refering to the variable name from +\code{MD} to plot on the x-axis.} + +\item{mdY}{A character vector of one refering to the variable name from +\code{MD} to plot on the y-axis.} + +\item{sel_cells}{Optional. A character vector of cell names (rownames of +\code{MD}) to highlight in the plot.} + +\item{sel_clust}{Optional. The name of the selected cluster +(\code{sel_cells}) to include in the legend. If +\code{\link{labelCellTypes}} has been run, pass the appropriate element of +\code{attr(Clusters(sCV),"ClusterNames")} to this argument to show both +cluster number and cell type label in the legend.} + +\item{md_log}{Optional. A character vector indicating which axes should be +log scaled. \code{c("x","y")} to log-scale both axes.} +} +\description{ +This function makes scatter/boxplots comparing cellular metadata. +} +\examples{ +\dontrun{ +plot_mdCompare(MD=getMD(input_data_obj), + mdX="total_counts", + mdY="total_features", + sel_cells=names(Clusters(sCVdata))[Clusters(sCVdata) == "1"], + sel_clust="1", + md_log="x") +} + +} diff --git a/man/plot_mdPerClust.Rd b/man/plot_mdPerClust.Rd new file mode 100644 index 0000000..fb24256 --- /dev/null +++ b/man/plot_mdPerClust.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_mdPerClust} +\alias{plot_mdPerClust} +\title{scClustViz plot: Plot to view cellular metadata by cluster} +\usage{ +plot_mdPerClust(MD, sel, cl, opt = "absolute") +} +\arguments{ +\item{MD}{A dataframe of cellular metadata. See \code{\link{getMD}}.} + +\item{sel}{A character vector of one refering to the variable name from +\code{MD} to plot.} + +\item{cl}{A factor of cluster assignments. See \code{\link{Cluster}}.} + +\item{opt}{Default="absolute". A character vector of plotting options. One of +\code{"absolute"}, \code{"relative"}, or \code{"y"}. \code{"y"} sets +log-scales the data for postive numerical metadata. For categorical +metadata, \code{"absolute"} plots a stacked barplot of raw counts, whereas +\code{"relative"} plots the proportion of each cluster represented by each +category.} +} +\description{ +This function makes boxplots / stacked barplots of cellular metadata +separated by cluster. +} +\examples{ +\dontrun{ +plot_mdPerClust(MD=getMD(input_data_obj), + sel="cyclonePhases", + cl=Clusters(sCVdata), + opt="relative") +} + +} diff --git a/man/plot_sil.Rd b/man/plot_sil.Rd new file mode 100644 index 0000000..c9b49c5 --- /dev/null +++ b/man/plot_sil.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_sil} +\alias{plot_sil} +\title{scClustViz plot: Silhouette plot} +\usage{ +plot_sil(sCVd) +} +\arguments{ +\item{sCVd}{An \code{\link{sCVdata}} object with a non-null \code{Silhouette} +slot.} +} +\description{ +This function is a wrapper to \code{plot(silhouette(x))}. +} diff --git a/man/plot_tsne.Rd b/man/plot_tsne.Rd new file mode 100644 index 0000000..3cbff63 --- /dev/null +++ b/man/plot_tsne.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{plot_tsne} +\alias{plot_tsne} +\title{scClustViz plot: Plot cell embedding in 2D} +\usage{ +plot_tsne(cell_coord, md, md_title, md_log = F, label = NULL, + sel_cells, sel_cells_A, sel_cells_B) +} +\arguments{ +\item{cell_coord}{A numeric matrix where named rows are cells, and two +columns are the x and y dimensions of the cell embedding.} + +\item{md}{The overlay information. Either a factor or numeric vector matching +the rows (cells) of the \code{cell_coord} matrix. If this is a factor, the +cells will be coloured by the factor levels. If a numeric vector, the cells +will be coloured using the \code{\link[viridis]{viridis}} colourscale.} + +\item{md_title}{NULL or a character vector of one. If NULL, \code{md} is +assumed to be cluster assignments. Otherwise this should be the title of +the overlay represented by \code{md}.} + +\item{md_log}{Default=FALSE. Logical vector of length one indicating whether +\code{md} should be log-transformed. Only to be used when \code{md} is +numeric.} + +\item{label}{Default=NULL. The output of \code{\link{tsne_labels}} to have +cluster names overlaid on the plot.} + +\item{sel_cells}{Optional. A character vector of cell names (rownames of +\code{cell_coord}) to highlight in the plot.} + +\item{sel_cells_A}{Optional. Alternative highlighting method to sel_cells, +can be used in conjunction. Meant for indicating a selected set of cells +when building manual cell set comparisons, in conjunction with +\code{sel_cells_B}.} + +\item{sel_cells_B}{Optional. See \code{sel_cells_A}.} +} +\description{ +This function plots cells in two dimensions, with various overlays. +} +\examples{ +\dontrun{ +# Cluster overlay: +plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), + md=Clusters(sCVdata), + md_title=NULL, + label=tsne_labels(sCVd=sCVdata, + cell_coord=getEmb(input_data_obj,"tsne"), + lab_type="ClusterNames")) + +# Metadata overlay: +plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), + md=getMD(input_data_obj)$total_counts, + md_title="Library Size", + md_log=TRUE, + label=tsne_labels(sCVd=sCVdata, + cell_coord=getEmb(input_data_obj,"tsne"), + lab_type="ClusterNames")) + +# Gene expression overlay: +plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"), + md=getExpr(input_data_obj,Param(sCVdata,"assayType"))["Actb",], + md_title="Actb") +} + +} diff --git a/man/runShiny.Rd b/man/runShiny.Rd index 4971933..ae120dd 100644 --- a/man/runShiny.Rd +++ b/man/runShiny.Rd @@ -4,7 +4,8 @@ \alias{runShiny} \title{Run the scClustViz Shiny app} \usage{ -runShiny(filePath, outPath, cellMarkers, annotationDB, rownameKeytype, ...) +runShiny(filePath, outPath, cellMarkers, annotationDB, rownameKeytype, + imageFileType = "pdf", ...) } \arguments{ \item{filePath}{A character vector giving the relative filepath to an RData @@ -41,6 +42,12 @@ function will assume the rownames are official gene symbols. If less than 80% of rownames map to official gene symbols, the function will try to predict the appropriate keytype of the rownames (this takes a bit of time).} +\item{imageFileType}{Default="pdf". The file format for saved figures. One of +\code{"pdf"} (generated with \code{\link[grDevices]{cairo_pdf}}), +\code{"eps"} (generated with \code{\link[grDevices]{cairo_ps}}), +\code{"tiff"} (generated with \code{\link[grDevices]{tiff}}), or +\code{"png"} (generated with \code{\link[grDevices]{png}}).} + \item{...}{Named options that should be passed to the \code{\link[shiny]{runApp}} call (these can be any of the following: "port", "launch.browser", "host", "quiet", "display.mode" and "test.mode").} diff --git a/man/tsne_labels.Rd b/man/tsne_labels.Rd new file mode 100644 index 0000000..9bb93c1 --- /dev/null +++ b/man/tsne_labels.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shinyModules.R +\name{tsne_labels} +\alias{tsne_labels} +\title{scClustViz plot element: Cluster names on cluster centroid.} +\usage{ +tsne_labels(sCVd, cell_coord, lab_type) +} +\arguments{ +\item{sCVd}{An sCVdata object.} + +\item{cell_coord}{A numeric matrix where named rows are cells, and two +columns are the x and y dimensions of the cell embedding.} + +\item{lab_type}{One of "ClusterNames", "ClusterNamesAll", or "Clusters". +"ClusterNames" places cluster names (added to sCVdata object by +\code{\link{labelCellTypes}}) at the centroid of all points sharing that +cluster name (can span clusters). "ClusterNamesAll" places cluster names at +the centroid of each cluster. "Clusters" places cluster ID +(\code{levels(Clusters(sCVd))}) at the centroid of each cluster.} +} +\description{ +See \code{\link{plot_tsne}} for application. +}