diff --git a/DESCRIPTION b/DESCRIPTION index ff9c467..29ee9db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,10 @@ Package: scClustViz Type: Package Title: Differential Expression-based scRNAseq Cluster Assessment and Viewing -Version: 0.1.0 -Authors@R: c(as.person("Brendan T Innes [aut,cre]"), - as.person("Gary D Bader [aut,ths]")) +Version: 0.2.0 +Date: 2018-08-16 +Authors@R: c(as.person("Brendan T. Innes [aut,cre]"), + as.person("Gary D. Bader [aut,ths]")) Description: An interactive R Shiny tool for visualizing single-cell RNAseq clustering results from the Seurat R package or any other analysis pipeline. Its main goal is two-fold: A: to help select a biologically appropriate resolution or K from clustering diff --git a/R/deTest.R b/R/deTest.R index 8a2f4f1..06838a9 100644 --- a/R/deTest.R +++ b/R/deTest.R @@ -68,7 +68,7 @@ #' file="for_scClustViz.RData") #' # Save these objects so you'll never have to run this slow function again! #' -#' runShiny(filePath="for_scClustViz.RData",annotationDB=org.Mm.eg.db) +#' runShiny(filePath="for_scClustViz.RData") #' } #' #' @seealso \code{\link{readFromSeurat}} or \code{\link{readFromManual}} for reading in @@ -83,6 +83,8 @@ clusterWiseDEtest <- function(il,testAll=TRUE, exponent=2,pseudocount=1,FDRthresh=0.01, threshType="dDR",dDRthresh=0.15,logGERthresh=1) { + temp_warn <- options("warn") + options(warn=-1) out <- list(CGS=list(),deTissue=list(),deVS=list(), deMarker=list(),deDist=list(),deNeighb=list()) @@ -204,8 +206,6 @@ clusterWiseDEtest <- function(il,testAll=TRUE, if (min(sapply(out[["deNeighb"]][[res]],nrow)) < 1) { break } } } + options(warn=temp_warn$warn) return(out) } - -#### removing cluster solutions not tested above. #### -# il[["cl"]] <- il[["cl"]][names(out[["deNeighb"]])] diff --git a/R/importData.R b/R/importData.R index 650fd24..81474f1 100644 --- a/R/importData.R +++ b/R/importData.R @@ -39,11 +39,20 @@ #' specify the desired cell embeddings.} #' } #' -#' @examples +#' @examples #' \dontrun{ -#' data_for_scClustViz <- readFromSeurat(your_seurat_object) -#' rm(your_seurat_object) +#' data_for_scClustViz <- readFromSeurat(your_seurat_object, +#' convertGeneIDs=F) +#' rm(your_seurat_object) #' # All the data scClustViz needs is in 'data_for_scClustViz'. +#' +#' DE_for_scClustViz <- clusterWiseDEtest(data_for_scClustViz) +#' +#' save(data_for_scClustViz,DE_for_scClustViz, +#' file="for_scClustViz.RData") +#' # Save these objects so you'll never have to run this slow function again! +#' +#' runShiny(filePath="for_scClustViz.RData") #' } #' #' @family importData functions diff --git a/R/runViz.R b/R/runViz.R index 2803713..71982e6 100644 --- a/R/runViz.R +++ b/R/runViz.R @@ -1,82 +1,99 @@ #' Run the scClustViz Shiny app #' -#' Performs differential expression testing between clusters for all cluster solutions in -#' order to assess the biological relevance of each cluster solution. Differential -#' expression testing is done using the Wilcoxon rank-sum test implemented in the base R -#' \code{stats} package. For details about what is being compared in the tests, see the -#' "Value" section. +#' Performs differential expression testing between clusters for all cluster +#' solutions in order to assess the biological relevance of each cluster +#' solution. Differential expression testing is done using the Wilcoxon rank-sum +#' test implemented in the base R \code{stats} package. For details about what +#' is being compared in the tests, see the "Value" section. #' -#' @param filePath A character vector giving the relative filepath to an RData file -#' containing two objects. One must be the list outputted by one of the importData -#' functions (either \code{\link{readFromSeurat}} or \code{\link{readFromManual}}) -#' containing the data for viewing in the app. The other must be the list outputted by -#' the \code{\link{clusterWiseDEtest}} function containing differential gene expression -#' results for viewing in the app. As long as none of the name of the list elements have -#' been changed, the objects can be named anything you'd like. Note that any files -#' generated by the Shiny app (ie. saving the selected cluster solution, saving custom -#' set DE testing results) will be saved/loaded in the same directory as the input file. +#' @param filePath A character vector giving the relative filepath to an RData +#' file containing two objects. One must be the list outputted by one of the +#' importData functions (either \code{\link{readFromSeurat}} or +#' \code{\link{readFromManual}}) containing the data for viewing in the app. +#' The other must be the list outputted by the \code{\link{clusterWiseDEtest}} +#' function containing differential gene expression results for viewing in the +#' app. As long as none of the name of the list elements have been changed, +#' the objects can be named anything you'd like. Note that any files generated +#' by the Shiny app (ie. saving the selected cluster solution, saving custom +#' set DE testing results) will be saved/loaded in the same directory as the +#' input file. #' -#' @param cellMarkers Optional. If you have canonical marker genes for expected cell -#' types, list them here (see example code below). Note that the gene names must match -#' rownames of your data (ie. use ensembl IDs if your gene expression matrix rownames -#' are ensembl IDs). The Shiny app will attempt to label clusters in the tSNE projection -#' by highest median gene expression. +#' @param outPath Optional. If you'd like to save/load any analysis files +#' to/from a different directory than the input directory (for example, if +#' you're using data from a package), specify that directory here. #' -#' @param annotationDB Optional. If the gene IDs in your data aren't official gene -#' symbols, you may want to pass an AnnotationDbi object (ie. org.Mm.eg.db / -#' org.Hs.eg.db for mouse / human respectively) as this argument. This will allow the -#' Shiny interface to both display gene symbols when highlighting genes in figures, and -#' search by gene symbol. +#' @param cellMarkers Optional. If you have canonical marker genes for expected +#' cell types, list them here (see example code below). Note that the gene +#' names must match rownames of your data (ie. use ensembl IDs if your gene +#' expression matrix rownames are ensembl IDs). The Shiny app will attempt to +#' label clusters in the tSNE projection by highest median gene expression. #' -#' @param rownameKeytype Optional. If passing the annotationDB argument, it helps if you -#' indicate what keytype (see \code{AnnotationDbi::keytypes(annotationDB)}) the rownames -#' of your data are. If this is missing, it will be determined automatically, but that -#' will take about 30s. +#' @param annotationDB Optional. An AnnotationDbi object for your data's species +#' (ie. org.Mm.eg.db / org.Hs.eg.db for mouse / human respectively). If +#' present, gene names will be shown in gene-specific figures, official gene +#' symbols (instead of your rownames) will be displayed in figures, and gene +#' searches performed using both official gene symbols and your rownames. If +#' the gene IDs in your data aren't official gene symbols, using this argument +#' will make the visualization tool much more useful. #' -#' @param exponent The log base of your normalized input data. Seurat normalization uses -#' the natural log (set this to exp(1)), while other normalization methods generally use -#' log2 (set this to 2). This is used if you use the function for testing differential -#' gene expression between custom sets, and should be set to the same parameters as in -#' \code{clusterWiseDEtest}. -#' -#' @param pseudocount The pseudocount added to all log-normalized values in your input -#' data. Most methods use a pseudocount of 1 to eliminate log(0) errors. This is used if -#' you use the function for testing differential gene expression between custom sets, -#' and should be set to the same parameters as in \code{clusterWiseDEtest}. +#' @param rownameKeytype Optional. A character vector indicating the +#' AnnotationDbi keytype (see \code{AnnotationDbi::keytypes(annotationDB)}) +#' that represents your rownames. If the annotationDB argument is present and +#' this is missing, the function will assume the rownames are official gene +#' symbols. If less than 80% of rownames map to official gene symbols, the +#' function will try to predict the appropriate keytype of the rownames (this +#' takes a bit of time). #' -#' @param FDRthresh The false discovery rate to use as a threshold for determining -#' statistical significance of differential expression calculated by the Wilcoxon -#' rank-sum test. This is used if you use the function for testing differential gene -#' expression between custom sets, and should be set to the same parameters as in +#' @param exponent Default = 2. The log base of your normalized input data. +#' Seurat normalization uses the natural log (set this to exp(1)), while other +#' normalization methods generally use log2 (set this to 2). This is used if +#' you use the function for testing differential gene expression between +#' custom sets, and should be set to the same parameters as in #' \code{clusterWiseDEtest}. #' -#' @param threshType Filtering genes for use in differential expression testing can be -#' done multiple ways. We use an expression ratio filter for comparing each cluster to -#' the rest of the tissue as a whole, but find that difference in detection rates works -#' better when comparing clusters to each other. You can set threshType to -#' \code{"logGER"} to use a gene expression ratio for all gene filtering, or leave it as -#' default (\code{"dDR"}) to use difference in detection rate as the thresholding method -#' when comparing clusters to each other. This is used if you use the function for -#' testing differential gene expression between custom sets, and should be set to the +#' @param pseudocount Default = 1. The pseudocount added to all log-normalized +#' values in your input data. Most methods use a pseudocount of 1 to eliminate +#' log(0) errors. This is used if you use the function for testing +#' differential gene expression between custom sets, and should be set to the #' same parameters as in \code{clusterWiseDEtest}. #' -#' @param dDRthresh Magnitude of detection rate difference of a gene between clusters to -#' use as filter for determining which genes to test for differential expression between -#' clusters. This is used if you use the function for testing differential gene -#' expression between custom sets, and should be set to the same parameters as in +#' @param FDRthresh Default = 0.01. The false discovery rate to use as a +#' threshold for determining statistical significance of differential +#' expression calculated by the Wilcoxon rank-sum test. This is used if you +#' use the function for testing differential gene expression between custom +#' sets, and should be set to the same parameters as in #' \code{clusterWiseDEtest}. #' -#' @param logGERthresh Magnitude of gene expression ratio for a gene between clusters to -#' use as filter for determining which genes to test for differential expression between -#' clusters. This is used if you use the function for testing differential gene -#' expression between custom sets, and should be set to the same parameters as in -#' \code{clusterWiseDEtest}. +#' @param threshType Default = "dDR". Filtering genes for use in differential +#' expression testing can be done multiple ways. We use an expression ratio +#' filter for comparing each cluster to the rest of the tissue as a whole, but +#' find that difference in detection rates works better when comparing +#' clusters to each other. You can set threshType to \code{"logGER"} to use a +#' gene expression ratio for all gene filtering, or leave it as default +#' (\code{"dDR"}) to use difference in detection rate as the thresholding +#' method when comparing clusters to each other. This is used if you use the +#' function for testing differential gene expression between custom sets, and +#' should be set to the same parameters as in \code{clusterWiseDEtest}. +#' +#' @param dDRthresh Default = 0.15. Magnitude of detection rate difference of a +#' gene between clusters to use as filter for determining which genes to test +#' for differential expression between clusters. This is used if you use the +#' function for testing differential gene expression between custom sets, and +#' should be set to the same parameters as in \code{clusterWiseDEtest}. #' -#' @return The function causes the scClustViz Shiny GUI app to open in a seperate window. +#' @param logGERthresh Default = 1. Magnitude of gene expression ratio for a +#' gene between clusters to use as filter for determining which genes to test +#' for differential expression between clusters. This is used if you use the +#' function for testing differential gene expression between custom sets, and +#' should be set to the same parameters as in \code{clusterWiseDEtest}. +#' +#' @return The function causes the scClustViz Shiny GUI app to open in a +#' seperate window. #' #' @examples #' \dontrun{ -#' data_for_scClustViz <- readFromSeurat(your_seurat_object) +#' data_for_scClustViz <- readFromSeurat(your_seurat_object, +#' convertGeneIDs=F) #' rm(your_seurat_object) #' # All the data scClustViz needs is in 'data_for_scClustViz'. #' @@ -86,19 +103,32 @@ #' file="for_scClustViz.RData") #' # Save these objects so you'll never have to run this slow function again! #' -#' runShiny(filePath="for_scClustViz.RData", -#' cellMarkers=list( -#' "Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna", -#' "Nes","Cux1","Cux2"), -#' "Interneurons"=c("Gad1","Gad2","Npy","Sst", -#' "Lhx6","Tubb3","Rbfox3","Dcx"), -#' "Cajal-Retzius neurons"="Reln", -#' "Intermediate progenitors"="Eomes", -#' "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b", -#' "Tle4","Nes","Cux1","Cux2", -#' "Tubb3","Rbfox3","Dcx") +#' runShiny(filePath="for_scClustViz.RData") +#' +#' ### Using example data from the MouseCortex package ### +#' devtools::install_github("BaderLab/MouseCortex") +#' library(org.Mm.eg.db) +#' runShiny(system.file("e13cortical_forViz.RData",package="MouseCortex"), +#' # Load input file (E13.5 data) from package directory. +#' outPath=".", +#' # Save any further analysis performed in the app to the +#' # working directory rather than library directory. +#' annotationDB="org.Mm.eg.db", +#' # This is an optional argument, but will add annotations. +#' cellMarkers=list("Cortical precursors"=c("Mki67","Sox2","Pax6", +#' "Pcna","Nes","Cux1","Cux2"), +#' "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6", +#' "Tubb3","Rbfox3","Dcx"), +#' "Cajal-Retzius neurons"="Reln", +#' "Intermediate progenitors"="Eomes", +#' "Projection neurons"=c("Tbr1","Satb2","Fezf2", +#' "Bcl11b","Tle4","Nes", +#' "Cux1","Cux2","Tubb3", +#' "Rbfox3","Dcx") +#' ) +#' # This is a list of canonical marker genes per expected cell type. +#' # The app uses this list to automatically annotate clusters. #' ) -#' ) #' } #' #' @seealso \code{\link{readFromSeurat}} or \code{\link{readFromManual}} for reading in @@ -112,7 +142,8 @@ #' #' @export -runShiny <- function(filePath,cellMarkers=list(), +runShiny <- function(filePath,outPath, + cellMarkers=list(), annotationDB,rownameKeytype, exponent=2,pseudocount=1,FDRthresh=0.01, threshType="dDR",dDRthresh=0.15,logGERthresh=1) { @@ -149,10 +180,18 @@ runShiny <- function(filePath,cellMarkers=list(), # needed in the Shiny app, and saves the objects in the function environment # under the names the shiny app expects. + cl <- cl[names(deNeighb)] + # Ensures that only clusters that were tested for differential expression are + # displayed. This prevents a whole pile of errors. + # ^^ dataPath & dataTitle -------------------------------------------------------------- temp_dataPath <- strsplit(filePath,"/|\\\\") - dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",filePath) - if (dataPath == "") { dataPath <- "./" } + if (missing(outPath)) { + dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",filePath) + if (dataPath == "") { dataPath <- "./" } + } else { + dataPath <- outPath + } dataTitle <- sub("\\.[^.]+$","",tail(temp_dataPath[[1]],1)) rm(temp_dataPath) # Seperates the file name (which becomes the dataTitle) from the path (which @@ -197,6 +236,10 @@ runShiny <- function(filePath,cellMarkers=list(), annotationDB <- get(annotationDB) } if (missing(rownameKeytype)) { + rownameKeytype <- "SYMBOL" + } + if (sum(rownames(nge) %in% keys(org.Mm.eg.db,rownameKeytype)) / nrow(nge) < 0.8) { + print("Less than 80% of rownames map to official gene symbols.") print("Automatically determining keytype from rownames...") temp_keyMatch <- pbapply::pbsapply(AnnotationDbi::keytypes(annotationDB),function(X) sum(rownames(nge) %in% AnnotationDbi::keys(annotationDB,X))) @@ -204,7 +247,9 @@ runShiny <- function(filePath,cellMarkers=list(), print(paste0("Keytype '",rownameKeytype,"' matched ", max(temp_keyMatch),"/",nrow(nge)," rownames.")) } - symbolMap <- mapIds(annotationDB,rownames(nge),"SYMBOL",rownameKeytype,multiVals="first") + if (rownameKeytype != "SYMBOL") { + symbolMap <- mapIds(annotationDB,rownames(nge),"SYMBOL",rownameKeytype,multiVals="first") + } } # ^ Cell type annotation from cellMarkers (and other quick calculations for Shiny) ----- @@ -1561,11 +1606,13 @@ runShiny <- function(filePath,cellMarkers=list(), xlab=NA,xaxt="n")) mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(annotationDB,keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) + try(tempGeneName <- mapIds(annotationDB,keys=input$cgGene,keytype=rownameKeytype, + column="GENENAME",multiVals="first"),silent=T) if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), + mtext(paste(paste0(names(tempGeneName),": ",tempGeneName),collapse="\n"), side=1,line=2,font=2) + } else { + mtext(paste(input$cgGene,collapse="\n"),side=1,line=2,font=2) } if ("sct" %in% input$bxpOpts) { bxpCol <- alpha(clustCols(res()),.2) @@ -1887,6 +1934,9 @@ runShiny <- function(filePath,cellMarkers=list(), } else { output$calcText <- renderText("") withProgress({ + temp_warn <- options("warn") + options(warn=-1) + temp <- rep("Unselected",nrow(d$cl)) names(temp) <- rownames(d$cl) temp[selectedSets$a] <- "Set A" @@ -1999,6 +2049,7 @@ runShiny <- function(filePath,cellMarkers=list(), d$deMarker[[newRes]][["Set B"]]$logGER <- d$deMarker[[newRes]][["Set B"]]$logGER * -1 selectedSets$a <- selectedSets$b <- NULL + options(warn=temp_warn$warn) },message="DE calculations:") res(newRes) # Automatically update the view to show the calculated results. @@ -2117,13 +2168,20 @@ runShiny <- function(filePath,cellMarkers=list(), quantile(range(dr_viz[,1]),.95)), y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- - select(annotationDB,keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) + try(tempGeneName <- mapIds(annotationDB,keys=goi,keytype=rownameKeytype, + column="GENENAME",multiVals="first"),silent=T) if (exists("tempGeneName")) { if (length(tempGeneName) > 4) { tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] } + tempGeneName[is.na(tempGeneName)] <- names(tempGeneName)[is.na(tempGeneName)] title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) + } else { + temp_goi <- goi + if (length(temp_goi) > 4) { + temp_goi[5] <- "and more..."; temp_goi <- temp_goi[1:5] + } + title(paste(temp_goi,collapse="\n"),line=0.25,adj=.01,font.main=1) } } } diff --git a/ToBeConvertedToPkg/README.md b/README.md similarity index 100% rename from ToBeConvertedToPkg/README.md rename to README.md diff --git a/ToBeConvertedToPkg/PrepareInputs.R b/ToBeConvertedToPkg/PrepareInputs.R deleted file mode 100644 index 833f851..0000000 --- a/ToBeConvertedToPkg/PrepareInputs.R +++ /dev/null @@ -1,278 +0,0 @@ -library(pbapply) -library(cluster) - -######## User-defined variables ######## -exponent <- 2 -## ^ log base of your normalized input data. -## Seurat defaults to natural log (set this to exp(1)), -## other methods are generally log2 (set this to 2). -pseudocount <- 1 -## ^ pseudocount added to all log-normalized values in your input data. -## Most methods use a pseudocount of 1 to eliminate log(0) errors. - -#threshType <- "logGER" # use an expression ratio-based threshold for filtering genes prior to DE testing -threshType <- "dDR" # use a difference in detection rate threshold for filtering -## Filtering genes for use in differential expression testing can be done multiple ways. -## We use an expression ratio filter for comparing each cluster to the rest of the tissue as a whole, -## but find that difference in detection rates works better when comparing clusters to each other. -## You can set threshType to "logGER" to use fold-change for all gene filtering if you'd prefer. - -logGERthresh <- 1 # magnitude of mean log-expression fold change between clusters to use as filter. -dDRthresh <- 0.15 # magnitude of detection rate difference between clusters to use as filter. -WRSTalpha <- 0.01 # significance level for DE testing using Wilcoxon rank sum test - -#dataRDS <- "../scClustViz_files/testData.rds" -## ^ path to input data object, saved as RDS (use saveRDS() to generate). -dataRData <- "../scClustViz_files/e17_Cortical_Only.RData" -## ^ path to input data, saved as RData (use save() to generate ) -outputDirectory <- "meCortex/e17/" -## ^ path to output directory with trailing slash (for loading into the R Shiny visualization script) -if (!dir.exists(outputDirectory)) { dir.create(outputDirectory) } - -convertGeneIDs <- FALSE ## Set to TRUE if your gene names aren't official gene symbols. -## If converting gene IDs, set the following: -geneRowNames <- "ensembl_gene_id" -## ^ Set to the biomaRt descriptor for the current gene name IDs. -## Run listAttri -speciesSymbol <- "mgi_symbol" ## Gene IDs will be converted to MGI symbols if input is mouse data -#speciesSymbol <- "hgnc_symbol" ## Gene IDs will be converted to HGNC symbols if input is human data - - -######## Functions ######## -meanLogX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -## ^ Adding a pseudocount of 1 to the logMean prior to logGER calculations skews the result quite dramatically, -## so instead we add a small pseudocount to avoid +/- inf results when means are zero, without the same skewing. -## Adding a very small (ie 1e-99) number means that means of zero get set to a large negative log-mean, -## when it might be more appropriate to have those values fall closer to the smallest non-zero log-mean. -## By using a pseudocount of 1 / number of samples, we ensure that log(zero) is smaller than any non-zero log-mean, -## while still being in the same ballpark. -rainbow2 <- function(n) { - hues = seq(15, 375, length = n + 1) - hcl(h = hues, l = 60, c = 100)[1:n] -} - -######## Build DE sets for all resolutions ######## - -if (exists("dataRDS")) { - inD <- readRDS(dataRDS) -} else if (exists("dataRData")) { - temp <- load(dataRData) - inD <- get(temp) - ## If you have multiple objects saved in this file, set inD to your data object. - ## i.e. inD <- get("mySeuratDataObject") - rm(list=c(temp,"temp")) -} else { warning("Set path to input data as dataRDS or dataRData") } - -if (class(inD) == "seurat") { - require(Seurat) - inD <- UpdateSeuratObject(inD) ## In case your Seurat object is from an older version of Seurat - - nge <- inD@data - ## ^ normalized gene expression matrix (matrix: genes x cells) - - if (convertGeneIDs) { - require(biomaRt) - e2g <- getBM(attributes=c(geneRowNames,speciesSymbol), - mart=mart,filters=geneRowNames, - values=rownames(nge)) - e2g <- e2g[e2g[,speciesSymbol] != "",] # removing unmapped gene symbols from conversion table - print(paste(sum(duplicated(e2g[,geneRowNames])),geneRowNames,"mapped to multiple",speciesSymbol)) - ## Arbitrarily picking one mapping for the above duplicates, since these generally map to predicted genes anyway. - e2g <- e2g[!duplicated(e2g[,geneRowNames]),] - rownames(e2g) <- e2g[,geneRowNames] - nge <- nge[e2g[,geneRowNames],] # removing unmapped genes from data - print(paste(sum(duplicated(e2g[,speciesSymbol])),speciesSymbol,"mapped to multiple",geneRowNames)) - ## Going to collapse these by summing UMI counts between duplicated rows. - temp_r <- nge[e2g[,speciesSymbol] %in% e2g[,speciesSymbol][duplicated(e2g[,speciesSymbol])],] - nge <- nge[!e2g[,speciesSymbol] %in% e2g[,speciesSymbol][duplicated(e2g[,speciesSymbol])],] - ## Removed duplicated rows from data, saved as separate object - rownames(nge) <- e2g[rownames(nge),speciesSymbol] # renamed rows in data as gene symbols - temp_r <- t(sapply(e2g[,speciesSymbol][duplicated(e2g[,speciesSymbol])],function(X) - colSums(temp_r[e2g[,geneRowNames][e2g[,speciesSymbol] == X],]))) - ## Collapsed by summing each duplicated gene symbol's row - nge <- rbind(nge,temp_r) # added those data back to matrix - } - - - if (!any(grepl("cycle|phase|G2M",colnames(inD@meta.data),ignore.case=T))) { - data("cc.genes") - inD <- CellCycleScoring(inD,g2m.genes=cc.genes$g2m.genes,s.genes=cc.genes$s.genes) - inD@meta.data$Phase <- factor(inD@meta.data$Phase,levels=c("G1","S","G2M")) # So that the phases are in order. - rm(cc.genes) - } - ## ^ If necessary, Seurat has a function to predict cell cycle phase from expression of canonical marker genes. - ## These are stored as HGNC symbols, but if your data is mouse it will try case-insensitive matches to homologues - ## (in which case you will see a warning in AddModuleScore indicating that it attempted to match case). - - md <- inD@meta.data[,!grepl("res\\.[0-9]",colnames(inD@meta.data))] - ## ^ metadata for cells (dataframe of cells) - - if (is.data.frame(inD@meta.data[,grepl("res\\.[0-9]",colnames(inD@meta.data))])) { - cl <- data.frame(lapply(inD@meta.data[,grepl("res\\.[0-9]",colnames(inD@meta.data))],as.factor)) - } else { - cl <- data.frame(inD@meta.data[,grepl("res\\.[0-9]",colnames(inD@meta.data))]) - colnames(cl) <- grep("res\\.[0-9]",colnames(inD@meta.data),value=T) - } - rownames(cl) <- rownames(md) - ## ^ cluster assignments per clustering resolution (dataframe: cells x cluster labels as factors) - - if (length(inD@calc.params) == 0) { - dr_clust <- inD@dr$pca@cell.embeddings - } else { - dr_clust <- inD@dr$pca@cell.embeddings[,inD@calc.params$RunTSNE$dims.use] - } - ## ^ cell embeddings in low-dimensional space used for clustering distances (matrix: cells x dimensions) - ## Only including those dimensions used in downstream analysis (ie. those passed to RunTSNE and FindClusters) - ## if that information is present (in calc.params). Else, using all lower dimensions available. - - dr_viz <- inD@dr$tsne@cell.embeddings - ## ^ cell embeddings in 2D space for visualization (usually tSNE) (matrix: cells x coordinates) - - rm(inD) -} else { - warning(" -Currently only Seurat objects are supported for auto-loading. -You will have to manually copy the data and metadata to the relevant objects. -See code above for details." - ) -} - -CGS <- deTissue <- deVS <- deMarker <- deDist <- deNeighb <- list() - -## This loop iterates through every cluster solution, and does DE testing between clusters -## to generate the DE metrics for assessing your clusters. This takes some time. -## If your cluster solutions are ordered in increasing resolution/number of clusters, -## you can uncomment a line of code at the end of this loop to have it exit if there -## are no longer differentially expressed genes between clusters. Note that this will -## mean that any cluster solutions not run through this loop won't show up in the GUI. - -for (res in colnames(cl)) { - #### Precalculate stats for viz tool #### - print("") - print("") - print(paste("Calculating cluster gene summary statistics for",res)) - print("-- Gene detection rate per cluster --") - DR <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) sum(Y>0)/length(Y))) - print("-- Mean detected gene expression per cluster --") - MDTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) { - temp <- meanLogX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - print("-- Mean gene expression per cluster --") - MTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],meanLogX)) - CGS[[res]] <- sapply(levels(cl[,res]),function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - - #### deTissue - DE per cluster vs all other data #### - print("") - print(paste("Calculating DE vs tissue for",res,"with",length(levels(cl[,res])),"clusters")) - print("-- logGER calculations --") - deT_logGER <- pbsapply(levels(cl[,res]),function(i) - MTC[i,] - apply(nge[,cl[,res] != i],1,meanLogX)) - deT_genesUsed <- apply(deT_logGER,2,function(X) which(X > logGERthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logGERthresh should be set to less than ", - min(apply(deT_logGER,2,function(X) max(abs(X)))), - ", the largest magnitude logGER between cluster ", - names(which.min(apply(deT_logGER,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - print("-- Wilcoxon rank sum calculations --") - deT_pVal <- pbsapply(levels(cl[,res]),function(i) - apply(nge[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[cl[,res] == i],X[cl[,res] != i])$p.value),simplify=F) - deTissue[[res]] <- sapply(levels(cl[,res]),function(i) - data.frame(logGER=deT_logGER[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,deTissue[[res]])$pVal,"fdr"), - rep(names(sapply(deTissue[[res]],nrow)),sapply(deTissue[[res]],nrow)),c) - for (i in names(deTissue[[res]])) { - deTissue[[res]][[i]] <- deTissue[[res]][[i]][tempQval[[i]] <= WRSTalpha,] - deTissue[[res]][[i]]$qVal <- tempQval[[i]][tempQval[[i]] <= WRSTalpha] - } - - #### deMarker - DE per cluster vs each other cluster #### - combos <- combn(levels(cl[,res]),2) - colnames(combos) <- apply(combos,2,function(X) paste(X,collapse="-")) - print("") - print(paste("Calculating marker DE for",res,"with",ncol(combos),"combinations of clusters")) - deM_dDR <- apply(combos,2,function(i) DR[i[1],] - DR[i[2],]) - deM_logGER <- apply(combos,2,function(i) MTC[i[1],] - MTC[i[2],]) - deM_genesUsed <- switch(threshType, - dDR=apply(deM_dDR,2,function(X) which(abs(X) > dDRthresh)), - logGER=apply(deM_logGER,2,function(X) which(abs(X) > logGERthresh))) - if (any(sapply(deM_genesUsed,length) < 1)) { - stop("Gene filtering threshold is set too high.") - } - deM_pVal <- pbsapply(colnames(combos),function(i) - apply(nge[deM_genesUsed[[i]],],1,function(X) - wilcox.test(X[cl[,res] == combos[1,i]], - X[cl[,res] == combos[2,i]])$p.value),simplify=F) - temp_deVS <- sapply(colnames(combos),function(i) - data.frame(dDR=deM_dDR[deM_genesUsed[[i]],i],logGER=deM_logGER[deM_genesUsed[[i]],i], - pVal=deM_pVal[[i]])[order(deM_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,temp_deVS)$pVal,"fdr"), - rep(names(sapply(temp_deVS,nrow)),sapply(temp_deVS,nrow)),c) - for (i in names(temp_deVS)) { temp_deVS[[i]]$qVal <- tempQval[[i]] } - - deVS[[res]] <- sapply(levels(cl[,res]),function(i) { - combos <- strsplit(names(temp_deVS),"-") - temp <- list() - for (X in seq_along(combos)) { - if (! i %in% combos[[X]]) { - next - } else if (which(combos[[X]] == i) == 1) { - temp[[combos[[X]][2]]] <- temp_deVS[[X]][temp_deVS[[X]][,threshType] > 0 & - temp_deVS[[X]]$qVal <= WRSTalpha,] - } else if (which(combos[[X]] == i) == 2) { - temp[[combos[[X]][1]]] <- temp_deVS[[X]][temp_deVS[[X]][,threshType] < 0 & - temp_deVS[[X]]$qVal <= WRSTalpha,] - temp[[combos[[X]][1]]]$dDR <- temp[[combos[[X]][1]]]$dDR * -1 - temp[[combos[[X]][1]]]$logGER <- temp[[combos[[X]][1]]]$logGER * -1 - } - } - return(temp) - },simplify=F) - - deMarker[[res]] <- sapply(deVS[[res]],function(X) { - markerGenes <- Reduce(intersect,lapply(X,rownames)) - temp <- sapply(X,function(Y) Y[markerGenes,c("dDR","logGER","qVal")],simplify=F) - names(temp) <- paste("vs",names(temp),sep=".") - return(do.call(cbind,temp)) - },simplify=F) - - ### deNeighb - DE between nearest neighbouring clusters #### - deDist[[res]] <- sapply(names(deVS[[res]]),function(X) sapply(names(deVS[[res]]),function(Y) - if (X == Y) { return(NA) } else { min(nrow(deVS[[res]][[X]][[Y]]),nrow(deVS[[res]][[Y]][[X]])) })) - nb <- colnames(deDist[[res]])[apply(deDist[[res]],1,which.min)] - names(nb) <- colnames(deDist[[res]]) - ## Nearest neighbour determined by number of DE genes between clusters. - - deNeighb[[res]] <- mapply(function(NB,VS) VS[[NB]][,c("dDR","logGER","qVal")], - NB=nb,VS=deVS[[res]],SIMPLIFY=F) - for (i in names(deNeighb[[res]])) { - colnames(deNeighb[[res]][[i]]) <- paste("vs",nb[i],colnames(deNeighb[[res]][[i]]),sep=".") - } - - if (min(sapply(deNeighb[[res]],nrow)) < 1) { break } - ## Uncomment the above line to exit the loop if there is no differentially expressed genes between - ## nearest neighbouring clusters at the current resolution. (This only makes sense if your cluster - ## solutions are ordered in increasing resolution). -} -cl <- cl[names(deNeighb)] # removing cluster solutions not tested above. - -#### Save outputs for visualization #### -save(nge,md,cl,dr_clust,dr_viz, - CGS,deTissue,deMarker,deDist,deNeighb, - file=paste0(outputDirectory, - sub("^.*/","",sub("\\.[A-Za-z0-9]+$","",get(grep("^dataRD",ls(),value=T)))), - "_forViz.RData")) -## ^ Saved objects for use in visualization script (RunVizScript.R). - -save(deVS,file=paste0(outputDirectory, - sub("^.*/","",sub("\\.[A-Za-z0-9]+$","",get(grep("^dataRD",ls(),value=T)))), - "_deVS.RData")) -## ^ All pairwise DE test results between clusters. - - diff --git a/ToBeConvertedToPkg/README_withDemoLinksThatDontWork.md b/ToBeConvertedToPkg/README_withDemoLinksThatDontWork.md deleted file mode 100644 index aab588e..0000000 --- a/ToBeConvertedToPkg/README_withDemoLinksThatDontWork.md +++ /dev/null @@ -1,49 +0,0 @@ -# scClustViz -An interactive R Shiny tool for visualizing single-cell RNAseq clustering results from the *Seurat* R package or any other analysis pipeline. Its main goal is two-fold: **A:** to help select a biologically appropriate resolution or K from clustering results by assessing differential expression between the resulting clusters; and **B:** help annotate cell types and identify marker genes. You can check out an [online demo here](https://innesbt.shinyapps.io/e11cortex/) using data from [Yuzwa *et al.*'s 2017 Cell Reports paper](https://doi.org/10.1016/j.celrep.2017.12.017/). - -- [scClustViz Usage](#scclustviz-usage) - - [Setup](#setup) - - [Run](#run) - - [Demos](#demos) -- [Cell Reports 2017](#cell-reports-2017) -- [scRNAseq analysis pipeline](#scrnaseq-analysis-pipeline) -- [Contact](#contact) - -## scClustViz Usage -scClustViz is distributed as a collection of R scripts rather than a package for ease of customization and integration into your existing analysis pipeline. Running the visualization tool requires a one-time setup step, then it's just a matter of running a single R script. - -### Setup -scClustViz assumes you have tried a variety of parameterizations when clustering the cells from your scRNAseq data, and want to decide which clustering solution you should use (if you haven't yet clustered your data, or are interested in an example of integrating the differential expression metric used in this tool to systematically test different clustering resolutions, see the example [pipeline below](#scrnaseq-analysis-pipeline)). -The setup step does the differential expression testing for all cluster solutions, and saves it in the file format necessary for the visualization to run. To perform the setup, download and run [PrepareInputs.R](PrepareInputs.R). Note that you will need to change some of the variables in the script to reflect your data/computer, as well as installing any missing libraries. This script is designed to take your *Seurat* output and generate all the differential expression information necessary for the visualization tool, which will be saved in a directory of your choosing. If you have your analysis outputs in another format (i.e. Bioconductor's SingleCellExperiment class), you can use the code used to pull the relevant bits out of the *Seurat* object as a template for loading your data into the visualization tool. I aim to include automatic loading for the SingleCellExperiment class shortly, but in the meantime you can check out [iSEE](https://bioconductor.org/packages/release/bioc/html/iSEE.html), which is a GUI designed specifically for Bioconductor. - -### Run -After you've run PrepareInputs.R, download [RunVizScript.R](RunVizScript.R) and [app.R](app.R). You will again need to change some variables and install any missing libraries in RunVizScript.R. You shouldn't need to touch app.R unless you're interested in modifying the Shiny visualization itself. Running RunVizScript.R will load your data into the visualization software, and will open the Shiny UI in a web browser. Have fun exploring your data! - -### Demos -These are .RData files ready to be run in scClustViz by downloading and pointing RunVizScript.R to the file path. -- [1000 cells from E18 mouse brain (10X Genomics)](demo/10Xneurons_forViz.RData). This is the output file of the example processing pipeline outlined below, [using data from 10X Genomics](https://support.10xgenomics.com/single-cell-gene-expression/datasets/2.1.0/neurons_900). -- [Mouse embryonic cerebral cortex](#cell-reports-2017) data from timepoints spanning neurogenesis are available below. - - -## Cell Reports 2017 -The data from the 2017 Cell Reports paper [Developmental Emergence of Adult Neural Stem Cells as Revealed by Single-Cell Transcriptional Profiling](https://doi.org/10.1016/j.celrep.2017.12.017) by Yuzwa *et al.* are available to explore using online instances of the scClustViz tool using the links below: -- [E11.5 Cerebral Cortex](https://innesbt.shinyapps.io/e11cortex/) - [file for download](meCortex/e11/e11_Cortical_Only_forViz.RData) -- [E13.5 Cerebral Cortex](https://innesbt.shinyapps.io/e13cortex/) - [file for download](meCortex/e13/e13_Cortical_Only_forViz.RData) -- [E15.5 Cerebral Cortex](https://innesbt.shinyapps.io/e15cortex/) - [file for download](meCortex/e15/e15_Cortical_Only_forViz.RData) -- [E17.5 Cerebral Cortex](https://innesbt.shinyapps.io/e17cortex/) - [file for download](meCortex/e17/e17_Cortical_Only_forViz.RData) - -These are DropSeq data from timepoints spanning neurogenesis and filtered for cortically-derived cells, processed on an earlier version of the pipeline and imported into scClustViz using PrepareInputs. - - -## scRNAseq analysis pipeline -I've also included my basic pipeline for scRNAseq analysis from data generated by either DropSeq (processed using the DropSeq cookbook) or 10X Genomics Chromium (processed using 10X Genomics CellRanger). The pipeline is broken into two steps, implemented as R notebooks in RStudio. The quality control and normalization steps are based to some extent on the [workflow](http://dx.doi.org/10.12688/f1000research.9501.2) proposed by the Marioni group using their [*scran* package](http://bioconductor.org/packages/release/bioc/html/scran.html), and the clustering is done using the algorithm implemented in the Satija lab's [*Seurat* package](https://satijalab.org/seurat/). I've set the pipeline up as R notebooks so that they generate convenient reports as they run. Here's an example using [GSM2861514](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2861514), the E17.5 mouse cerebral cortex data from [Yuzwa *et al.*'s 2017 Cell Reports paper](https://doi.org/10.1016/j.celrep.2017.12.017/). -**Note that if you use this pipeline to process your data, you can load the output straight into RunVizScript.R and skip the PrepareInputs.R processing step.** -- [Quality Control & Normalization](pipeline/pipeline_QCN.md) or a [pdf report](pipeline/pipeline_QCN.pdf), and download the [R notebook](pipeline/pipeline_QCN.Rmd) (RStudio required). If you're loading DropSeq data, download the [helper bash script](pipeline/DropSeqDGEhelper.sh) which prepares the digital gene expression matrix for easy loading in R by stripping and saving column (cell) and row (gene) names. -- [Clustering and DE analysis](pipeline/pipeline_Clust.md) or a [pdf report](pipeline/pipeline_Clust.pdf), and download the [R notebook](pipeline/pipeline_Clust.Rmd) (RStudio required). - - -### Contact -You can [contact me](http://www.baderlab.org/BrendanInnes) for questions about this repo. For general scRNAseq questions, do what I do and [ask the Toronto single-cell RNAseq working group on Slack](http://bit.ly/scRNAseqTO)! - - - diff --git a/ToBeConvertedToPkg/RunVizScript.R b/ToBeConvertedToPkg/RunVizScript.R deleted file mode 100644 index 455dbb9..0000000 --- a/ToBeConvertedToPkg/RunVizScript.R +++ /dev/null @@ -1,153 +0,0 @@ -######## User-defined variables ######## - -dataPath <- "meCortex/e13/e13_Cortical_Only_forViz.RData" -## ^ Point this to the output file from PrepareInputs.R -## If you set a default resolution in the Shiny app, it will save to the same directory. - -vizScriptPath <- "./" -## ^ Point this to the directory in which the "app.R" Shiny script resides - -species <- "mouse" -## ^ Set species ("mouse"/"human"). -## If other, add the annotation database from Bioconductor to the egDB <- switch() expression below. - - -#### List known cell-type markers #### -cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna","Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b","Tle4", - "Nes","Cux1","Cux2","Tubb3","Rbfox3","Dcx")) -#cellMarkers <- list() -## ^ If you have canonical marker genes for expected cell types, list them here -## (see example above from mouse embryonic cortex). The Shiny app will attempt -## to label clusters in the tSNE projection by highest median gene expression. -## Otherwise leave the list blank (uncomment line above). - - -#### Variables for differential expression analysis #### -exponent <- 2 -## ^ log base of your normalized input data. -## Seurat defaults to natural log (set this to exp(1)), -## other methods are generally log2 (set this to 2). -pseudocount <- 1 -## ^ pseudocount added to all log-normalized values in your input data. -## Most methods use a pseudocount of 1 to eliminate log(0) errors. - -#threshType <- "logGER" # use a fold-change-based threshold for filtering genes prior to DE testing -threshType <- "dDR" # use a difference in detection rate threshold for filtering -## Filtering genes for use in differential expression testing can be done multiple ways. -## We use a fold-change filter for comparing each cluster to the tissue as a whole, but find that -## difference in detection rates works better when comparing clusters to each other. You can set -## threshType to "logGER" to use fold-change for all gene filtering if you'd prefer. - -logGERthresh <- 1 # magnitude of mean log-expression fold change between clusters to use as filter. -dDRthresh <- 0.15 # magnitude of detection rate difference between clusters to use as filter. -WRSTalpha <- 0.01 # significance level for DE testing using Wilcoxon rank sum test - - -######################################## - - - -######## Code to run the Shiny app ######## -library(markdown) -library(shiny) -library(cluster) -library(gplots) -library(scales) -library(viridis) -library(RColorBrewer) -library(TeachingDemos) - -egDB <- switch(species, - mouse={ requireNamespace(org.Mm.eg.db); "org.Mm.eg.db" }, - human={ requireNamespace(org.Hs.eg.db); "org.Hs.eg.db" }, - stop(" -Set species please! -If not mouse/human, add your species' annotation database from Bioconductor: -source('https://bioconductor.org/biocLite.R') -biocLite('org.Xx.eg.db') -")) - -meanLogX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -if (length(cellMarkers) < 1) { - cellMarkersS <- cellMarkersU <- list() -} else { - cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2, - function(X) do.call(intersect,unname(cellMarkers[X]))) - try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2, - function(X) paste(X,collapse="&")),silent=T) - cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] - cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) -} - -demoRegex <- switch(species,mouse="^Actb$",human="^ACTB$") - - -load(dataPath) -temp_dataPath <- strsplit(dataPath,"/|\\\\") -dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",dataPath) -if (dataPath == "") { dataPath <- "./" } -dataTitle <- sub("\\..+$|_forViz\\..+$","",temp_dataPath[[1]][length(temp_dataPath[[1]])]) -rm(temp_dataPath) - -for (selDEfile in grep(paste0("^",dataTitle,".+selDE.+RData$"),list.files(dataPath),value=T)) { - temp <- load(paste0(dataPath,selDEfile)) - cl <- cbind(cl,new_cl) - CGS <- append(CGS,new_CGS) - deTissue <- append(deTissue,new_deTissue) - deMarker <- append(deMarker,new_deMarker) - rm(list=temp) -} - -if (file.exists(paste0(dataPath,dataTitle,"_savedRes.RData"))) { - load(paste0(dataPath,dataTitle,"_savedRes.RData")) -} else { - savedRes <- NULL -} - -if (!file.exists(paste0(dataPath,"intro.md"))) { - write(paste0(dataTitle,": You can add to this preamble by editting ",dataPath,"intro.md"), - file=paste0(dataPath,"intro.md")) -} - -silDist <- dist(dr_clust) -## ^ precalculating distances in reduced dimensionality space for the silhouette plot. - -for (l in names(CGS)) { - for (i in names(CGS[[l]])) { - CGS[[l]][[i]]$MTCrank <- rank(CGS[[l]][[i]]$MTC,ties.method="min")/nrow(CGS[[l]][[i]]) - if (i == "Unselected") { next } - CGS[[l]][[i]]$cMu <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersU) - CGS[[l]][[i]]$cMs <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersS) - CGS[[l]][[i]]$overCut <- CGS[[l]][[i]]$MTC > mean(CGS[[l]][[i]]$MTC) - CGS[[l]][[i]]$genes <- rownames(CGS[[l]][[i]]) - } -} - -if (length(cellMarkers) < 1) { - clusterID <- sapply(names(CGS),function(X) sapply(CGS[[X]],function(Z) return("")),simplify=F) -} else if (!any(unlist(cellMarkers) %in% rownames(nge))) { - warning(paste("None of the provided cellMarkers are found in the data", - "(check your gene IDs against rownames in your data).")) - clusterID <- sapply(names(CGS),function(X) sapply(CGS[[X]],function(Z) return("")),simplify=F) -} else { - clusterID <- sapply(CGS,function(Z) { - temp <- names(cellMarkers)[sapply(Z,function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))] - names(temp) <- names(Z) - temp[names(temp) == "Unselected"] <- "Unselected" - return(temp) - },simplify=F) -} - -#### Run the Shiny App! #### -runApp(vizScriptPath) diff --git a/ToBeConvertedToPkg/app.R b/ToBeConvertedToPkg/app.R deleted file mode 100644 index ab1f752..0000000 --- a/ToBeConvertedToPkg/app.R +++ /dev/null @@ -1,1850 +0,0 @@ -########## UI ########## -ui <- fixedPage( - fixedRow( - titlePanel(paste("scClustViz -",dataTitle)), - includeMarkdown(paste0(dataPath,"intro.md")) - ), - hr(), - - ######## Clustering Solution Selection ######## - fixedRow( - titlePanel("Clustering Solution Selection"), - p(paste("Here you can compare the results of clustering at different resolutions to", - "determine the appropriate clustering solution for your data. You can see the", - "cluster solutions represented as boxplots on the left, where each boxplot", - "represents the number of genes differentially expressed between each cluster", - "and its nearest neighbour, or marker genes per cluster. The cluster selected", - "in the pulldown menu is highlighted in red, and the silhouette plot for that", - "cluster is shown on the right.")), - p(paste("A silhouette plot is a horizontal barplot where each bar is a cell, grouped by", - "cluster. The width of each bar represents the difference between mean distance", - "to other cells within the cluster and mean distance to cells in the nearest", - "neighbouring cluster. Distance is Euclidean in reduced dimensional space.", - "Positive silhouettes indicate good cluster cohesion.")), - p(paste("Once you've selected an appropriate cluster solution (we suggest picking one", - "where all nearest neighbouring clusters have differentially expressed genes", - "between them), click 'View clusters at this resolution' to proceed. If you", - "want to save this cluster solution as the default for next time, click 'Save", - "this resolution as default'. All figures can be downloaded as PDFs by clicking", - "the buttons next to each figure.")), - h1() - ), - fixedRow( - column(6, - fixedRow(column(6,uiOutput("resSelect"),align="left"), - column(6,align="right", - actionButton("go","View clusters at this resolution",icon("play"), - style="color: #fff; background-color: #008000"), - uiOutput("saveButton") - ) - ), - radioButtons("deType",NULL,list("# of DE genes to nearest neighbouring cluster"="deNeighb", - "# of marker genes per cluster"="deMarker"),inline=T), - plotOutput("cqPlot",height="500px")), - column(6,plotOutput("sil",height="600px")) - ), - fixedRow( - column(6,downloadButton("cqPlotSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") - ), - hr(), - - ######## Dataset and Cluster Metadata Inspection ######## - fixedRow( - titlePanel("Dataset and Cluster Metadata Inspection"), - p(paste("Here you can explore your dataset as a whole: cluster assignments for all", - "cells; metadata overlays for cell projections; and figures for comparing", - "both numeric and categorical metadata. The top two figures show cells", - "projected into 2D space, where proximity indicates transcriptional similarity.", - "On the left you can see cluster assignments and the nearest neighbours used in", - "the differential expression calculations. If cell type marker genes were", - "provided in RunVizScript.R, it will also show predicted cell type annotations.", - "On the right you can add a metadata overlay to the cell projection. Below", - "you can view relationships in the metadata as a scatterplot or compare clusterwise", - "distributions of metadata as bar- or box-plots. If you select a cluster of interest", - "(by clicking on a cell in the top-left plot, or from the list two sections down)", - "it will be highlighted for comparison in these figures.")), - strong(paste("You can select any cluster for further assessment by clicking on a cell", - "from that cluster in the top-left figure.")), - h1() - ), - fixedRow( - column(6, - if (length(cellMarkers) > 0 & !all(unlist(clusterID) == "")) { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn", - "Cluster annotations"="ca", - "Cluster annotations (label all)"="can")) - } else { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn")) - }, - checkboxInput("nnArrow",value=F,width="100%", - label="Show nearest neighbouring clusters by # of DE genes.") - ), - - column(4,selectInput("tsneMDcol",label="Metadata:",width="100%",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(2,uiOutput("tsneMDlog")) - ), - fixedRow( - column(6,plotOutput("tsne",height="580px",click="tsneClick")), - column(6,plotOutput("tsneMD",height="580px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("tsneMDSave","Save as PDF")) - ), - hr(), - - fixedRow( - column(2,selectInput("mdScatterX","X axis:",choices=colnames(md),selected="total_counts")), - column(2,selectInput("mdScatterY","Y axis:",choices=colnames(md),selected="total_features")), - column(2,uiOutput("scatterLog")), - - column(3,selectInput("mdFactorData","Metadata:",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(3,uiOutput("mdFactorOpts")) - ), - fixedRow( - column(6,plotOutput("mdScatter",height="560px")), - column(6,plotOutput("mdFactor",height="560px")) - ), - fixedRow( - column(6,align="left",downloadButton("mdScatterSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) - ), - hr(), - - ######## Differentially Expressed Genes per Cluster ######### - fixedRow( - titlePanel("Differentially Expressed Genes per Cluster"), - p(HTML(paste("Here you can explore the significantly differentially expressed genes per", - "cluster. 'DE vs Rest' refers to positively differentially expressed genes", - "when comparing a cluster to the rest of the cells as a whole. 'Marker genes'", - "refers to genes positively differentially expressed versus all other clusters", - "in a series of pairwise tests. 'DE vs neighbour' refers to genes positively", - "differentially expressed versus the nearest neighbouring cluster, as measured", - "by number of differentially expressed genes between clusters. In all cases,", - "Wilcoxon rank-sum tests are used, with a",percent(WRSTalpha),"false detection", - "rate threshold."))), - p(paste("The heatmap is generated using the differentially expressed genes from the test", - "and number of genes selected below. Differentially expressed gene lists can be", - "downloaded as tab-separated text files by selecting the test type and cluster,", - "and clicking 'Download gene list'. Genes used in the heatmap can be viewed in", - "the gene expression plots below as well.")), - h1() - - ), - - fixedRow( - column(2,uiOutput("heatDEtype")), - column(6,uiOutput("DEgeneSlider")), - column(2,uiOutput("DEclustSelect")), - column(2,downloadButton("deGeneSave","Download gene list"), - downloadButton("heatmapSave","Save as PDF"),align="right") - ), - fixedRow(plotOutput("heatmap",height="640px")), - hr(), - - ######### Gene Expression Distributions per Cluster ######### - fixedRow( - titlePanel("Gene Expression Distributions per Cluster"), - p(paste("Here you can investigate the expression of individual genes per cluster and", - "across all clusters. The first plot shows mean expression of genes in a cluster", - "as a function of their detection rate and transcript count when detected. The", - "x-axis indicates the proportion of cells in the cluster in which each gene was", - "detected (transcript count > 0), while the y-axis shows the mean normalized", - "transcript count for each gene from the cells in the cluster in which that gene", - "was detected. You can select the cluster to view from the menu below, and genes", - "can be labelled in the figure based on the cell-type markers provided in", - "RunVizScipt.R, the differentially expressed genes from the selected cluster in", - "the above heatmap, or by searching for them in the box below the figure.")), - p(paste("Clicking on the first plot will populate the list of genes near the point clicked,", - "which can be found above the next figure. By selecting a gene from this list,", - "you can compare the expression of that gene across all clusters in the second figure.", - "This list can also be populated using the gene search feature. Plotting options", - "for the second figure include the option to overlay normalized transcript count", - "from each cell in the cluster over their respective boxplots ('Include scatterplot'),", - "and the inclusion of the percentile rank of that gene's expression per cluster as", - "small triangles on the plot using the right y-axis ('Include gene rank').")), - h1() - ), - fixedRow( - column(3,uiOutput("genePlotClustSelect")), - column(9,if (length(cellMarkers) > 0) { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Cell-type markers"="markers", - "Top DE genes (from heatmap)"="heatmap", - "Gene symbols from search box below"="search")) - } else { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Top DE genes (from heatmap)"="heatmap", - "Gene symbols from search box below"="search")) - }) - ), - fixedRow(align="right", - plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") - ), - - #### Gene expression comparison #### - fixedRow( - column(3,radioButtons("searchType",label="Search by:", - choices=c("Gene list (comma-separated)"="comma", - "Regular expression"="regex"))), - column(8,uiOutput("geneSearchBox")), - column(1,actionButton("GOIgo","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOIgo { margin-top: 25px; margin-left: -25px; }"), - fixedRow( - column(2,uiOutput("cgSelect")), - column(5,radioButtons("boxplotGene",inline=T, - label="Genes of interest (to populate list):", - choices=c("From click on plots above or below"="click", - "From gene search"="search"))), - column(5,checkboxGroupInput("bxpOpts",label="Figure options:", - selected=c("sct","rnk","notch"),inline=T, - choices=list("Include scatterplot"="sct", - "Include gene rank"="rnk", - "Show notch"="notch"))) - ), - fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") - ), - hr(), - - ######## Cluster comparison ######### - fixedRow( - titlePanel("Cluster/Set Comparison of Gene Statistics"), - p(HTML(paste("Here you can directly compare gene expression statistics between clusters.", - "Any clusters from the currently selected cluster solution can be compared,", - "and you can switch cluster solutions from the menu here for convenience.", - "The stats that can be compared are mean normalized transcript count per", - "cluster (Mean gene expression), proportion of cells in a cluster in", - "which each gene was detected (Detection rate), and mean normalized", - "transcript count in cells of the cluster in which the gene was detected", - "(Mean detected gene expression). Genes can be labelled based on", - "differential expression from the heatmap above, or using the gene search", - "feature above."))), - p(paste("The most different genes in the current comparison can also be labelled. This", - "calculation can simply be subtracting the gene stat of the x-axis cluster from", - "that of the y-axis, or distance (residual) from the line of best fit. The latter", - "calculation may be of value if there is concern that a technical factor such as", - "library size is confounding a direct comparison between clusters. In either case,", - "the resulting values can be downloaded as a ranked list where positive values are", - "higher in the cluster on the y-axis, and negative values are higher in the x-axis", - "cluster. Since this list ranks all genes in the experiment, it could be used as an", - "input for GSEA.")), - h1() - ), - fixedRow( - column(7,plotOutput("setScatter",height="640px",click="scatterClick")), - column(5, - fixedRow( - column(10,uiOutput("resSelect2")), - column(2,actionButton("go2","View",icon("play"), - style="color: #fff; background-color: #008000")) - ), - fixedRow(column(12,uiOutput("saveButton2"))), - fixedRow( - column(6,uiOutput("setScatterY")), - column(6,uiOutput("setScatterX")) - ), - fixedRow( - column(7,radioButtons("scatterInput",label="Gene stat to display:", - choices=c("Mean gene expression"="MTC", - "Detection rate"="DR", - "Mean detected gene expression"="MDTC"))), - column(5,radioButtons("scatterLine",label="Difference calculation:", - choices=c("Subtraction"="sub","From line of best fit"="lbf"))) - ), - fixedRow(column(12,radioButtons("diffLabelType",label="Label genes by:", - choices=c("Most different by calculation"="diff", - "Top DE genes (from heatmap)"="de", - "Genes symbols from search box above"="search"), - inline=T))), - fixedRow(column(12,uiOutput("diffLabelSelect"))), - fixedRow( - column(4,checkboxInput("scatterLabelAngle",label="Flip label angle",value=F)), - column(4,downloadButton("setScatterSave","Save as PDF"),align="right"), - column(4,downloadButton("setComparisonSave","Download ranked list")) - ) - ) - ),tags$style(type='text/css',paste("button#go2 { margin-top: 25px; margin-left: -25px; }", - "button#updateForViz2 { margin-top: -25px; }")), - - hr(), - - ######## Custom sets for DE ######### - fixedRow(titlePanel("Manually Select Cells for DE Testing")), - fixedRow( - column(8,plotOutput("tsneSelDE",brush="tsneBrush",height="750px")), - column(4, - p(paste("Here you can select cells to further explore using the figures above.", - "Click and drag to select cells, and use the buttons below to add them", - "to a set of cells. When your sets are ready, name the comparison and", - "click the 'Calculate differential gene expression' button. Once the", - "calculation is done the comparison will be added to the cluster list", - "at the top of the page and the current cluster solution will be updated", - "to show this comparison. The comparison can be saved by clicking 'Save", - "this comparison to disk' next to either cluster solution menu.")), - hr(), - selectInput("tsneSelDEcol","Metadata overlay:",choices=c("",colnames(md))), - hr(), - column(6,htmlOutput("textSetA"), - actionButton("addCellsA","Set A: Add Cells",icon("plus"), - style="color: #fff; background-color: #a50026"), - actionButton("removeCellsA","Set A: Remove Cells",icon("minus"), - style="color: #a50026; background-color: #fff; border-color: #a50026") - ), - column(6,htmlOutput("textSetB"), - actionButton("addCellsB","Set B: Add Cells",icon("plus"), - style="color: #fff; background-color: #313695"), - actionButton("removeCellsB","Set B: Remove Cells",icon("minus"), - style="color: #313695; background-color: #fff; border-color: #313695") - ), - htmlOutput("textOverlap"), - hr(), - textInput("DEsetName","Short name for this comparison:", - placeholder="A-z0-9 only please"), - actionButton("calcDE","Calculate differential gene expression",icon("play")), - hr(), - span(textOutput("calcText"),style="color:red") - ) - ), - hr(), - - ######## Distribution of genes of interest ######### - fixedRow( - titlePanel("Cell Distribution of Genes of Interest"), - p(paste("Here you can overlay gene expression values for individual genes of interest", - "on the cell projection. Search for your gene using the search box below,", - "then select your gene(s) of interest from the dropdown 'Select genes' menu.", - "You can select multiple genes, but note that for each cell only the gene", - "expression of the gene with the highest expression in that cell will be displayed.", - "You have the option to include the cluster labels from the first cell projection", - "figure in these plots, and to colour the clusters themselves. There are two", - "copies of this figure for ease of comparison between genes of interest.")), - h1() - ), - fixedRow( - column(2,radioButtons("searchType1",label="Search by:", - choices=c("Gene list"="comma", - "Regular expression"="regex"))), - column(3,uiOutput("geneSearchBox1")), - column(1,actionButton("GOI1go","Search",icon=icon("search"))), - - column(2,radioButtons("searchType2",label="Search by:", - choices=c("Gene list"="comma", - "Regular expression"="regex"))), - column(3,uiOutput("geneSearchBox2")), - column(1,actionButton("GOI2go","Search",icon=icon("search"))) - ),tags$style(type='text/css', paste("button#GOI1go { margin-top: 25px; margin-left: -25px; }", - "button#GOI2go { margin-top: 25px; margin-left: -25px; }")), - - fixedRow( - column(3, - radioButtons("plotClust1",inline=T,label="Plot:",selected="goi", - choices=list("Clusters"="clust","Gene expression overlay"="goi")), - checkboxInput("plotLabel1",label="Include cluster labels (style as above)",value=T) - ), - column(3,uiOutput("GOI1select")), - - column(3, - radioButtons("plotClust2",inline=T,label="Plot:",selected="goi", - choices=list("Clusters"="clust","Gene expression overlay"="goi")), - checkboxInput("plotLabel2",label="Include cluster labels (style as above)",value=T) - ), - column(3,uiOutput("GOI2select")) - ), - - fixedRow( - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")), - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")) - ), - fixedRow( - column(6,plotOutput("goiPlot1",height="600px")), - column(6,plotOutput("goiPlot2",height="600px")) - ), - fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) - ), - h1() -) - - -########## Server ########## -server <- function(input,output,session) { - d <- reactiveValues(cl=cl,CGS=CGS, - clusterID=clusterID, - deTissue=deTissue, - deMarker=deMarker) - - clustCols <- function(res) { - if (grepl("^Comp",res)) { - c(brewer.pal(3,"PRGn")[c(1,3)],"grey80") - } else if (length(levels(d$cl[,res])) <= 8) { - brewer.pal(length(levels(d$cl[,res])),"Dark2")[1:length(levels(d$cl[,input$res]))] - } else { - rainbow2(length(levels(d$cl[,res]))) - } - } - - - ######## Cluster Resolution Selection ######## - #### Inter-cluster DE boxplots #### - numClust <- sapply(cl[,!grepl("^Comp",colnames(cl))],function(X) length(levels(X))) - clustList <- reactive({ - temp <- as.list(colnames(d$cl)) - names(temp)[seq_along(numClust)] <- paste0(unlist(temp)[seq_along(numClust)], - ": ",numClust," clusters") - if (length(temp) > length(numClust)) { - names(temp)[setdiff(seq_along(temp),seq_along(numClust))] <- - paste0("Comparison: ", - sub("Comp.","",fixed=T, - x=unlist(temp)[setdiff(seq_along(temp),seq_along(numClust))])) - } - return(temp) - }) - output$resSelect <- renderUI({ - if (is.null(res())) { temp_sel <- savedRes} else { temp_sel <- res() } - selectInput("res","Resolution:",choices=clustList(),selected=temp_sel) - }) - output$saveButton <- renderUI({ - if (grepl("^Comp",input$res)) { - actionButton("updateForViz","Save this comparison to disk",icon("save")) - } else { - actionButton("save","Save this resolution as default",icon("bookmark")) - } - }) - numClust <- numClust[numClust > 1] - - plot_cqPlot <- function() { - numDEgenes <- lapply(get(input$deType)[!grepl("^Comp",names(get(input$deType)))], - function(X) sapply(X,nrow)) - toplim <- c(21,max(unlist(numDEgenes)) + 20) - botlim <- c(-1,21) - - if (grepl("^Comp",input$res)) { - par(mar=c(3,3.5,1,1)) - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Press 'View clusters at this resolution'", - "to view the comparison", - sub("Comp.","",input$res,fixed=T),sep="\n")) - } else { - par(mar=c(0.2,3.5,1,1),mgp=2:0,mfrow=c(2,1)) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=toplim,yaxs="i",xaxt="n",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - - par(mar=c(3,3.5,0.2,1),mgp=2:0) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=botlim,yaxs="i",xlab="Number of clusters",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - mtext(switch(input$deType, - "deMarker"="Positive DE genes per cluster to all other clusters", - "deNeighb"="Positive DE genes per cluster to nearest cluster") - ,side=2,line=2.5,at=botlim[2],xpd=NA) - } - } - - output$cqPlot <- renderPlot({ - print(plot_cqPlot()) - }) - - output$cqPlotSave <- downloadHandler( - filename="cqPlot.pdf", - content=function(file) { - pdf(file,width=7,height=6) - print(plot_cqPlot()) - dev.off() - } - ) - - #### Silhouette plot #### - plot_sil <- function() { - tempSil <- silhouette(as.integer(d$cl[,input$res]),dist=silDist) - par(mar=c(4.5,.5,1.5,1.5),mgp=2:0) - if (length(tempSil) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Silhouette plot cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - plot(tempSil,beside=T,border=NA,main=NA,col=clustCols(input$res),do.n.k=T) - } - } - - output$sil <- renderPlot({ - print(plot_sil()) - }) - - output$silSave <- downloadHandler( - filename="sil.pdf", - content=function(file) { - pdf(file,width=6,height=7) - print(plot_sil()) - dev.off() - } - ) - - #### Resolution selection buttons #### - res <- reactiveVal() - observeEvent(input$go,res(input$res),ignoreNULL=F) - observeEvent(input$go2,res(input$res2),ignoreNULL=F) - - observeEvent(input$save,{ - savedRes <<- input$res #<<- updates variable outside scope of function (ie. global environment) - save(savedRes,file=paste0(dataPath,dataTitle,"_savedRes.RData")) - }) - - - ######## Cell-type Clusters ######## - clusts <- reactive(d$cl[,res()]) - - #### Cell-type tSNE #### - plot_tsne_labels <- function() { - if (input$tsneLabels == "ca") { - temp_labelNames <- sapply(unique(d$clusterID[[res()]]),function(X) - names(which(d$clusterID[[res()]] == X)),simplify=F) - temp_labels <- apply(dr_viz,2,function(Y) - tapply(Y,apply(sapply(temp_labelNames,function(X) clusts() %in% X),1,which),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=names(temp_labelNames),font=2,cex=1.5) - } else if (input$tsneLabels == "can") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=d$clusterID[[res()]],font=2,cex=1.5) - } else if (input$tsneLabels == "cn") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=levels(clusts()),font=2,cex=1.5) - } else { - legend("center",legend="You changed the label choice names...") - } - } - - plot_tsne <- function() { - par(mar=c(3,3,4,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs"), - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(clustCols(res())[clusts()],0.2)[!ci()], - bg=alpha(clustCols(res())[clusts()],0.1)[!ci()]) - points(dr_viz[ci(),],pch=21, - col=alpha(clustCols(res())[clusts()],1)[ci()], - bg=alpha(clustCols(res())[clusts()],0.5)[ci()]) - } else { - points(dr_viz,pch=21, - col=alpha(clustCols(res())[clusts()],1), - bg=alpha(clustCols(res())[clusts()],0.5)) - } - if (hiC() != "") { - mtext(side=3,line=-1,text=paste("Cluster",hiC(),"-", - d$clusterID[[res()]][hiC()],"-", - sum(clusts() == hiC()),"cells")) - } - if (input$nnArrow) { - temp_nn <- sapply(deNeighb[[res()]],function(X) - unique(gsub(pattern="^vs\\.|\\.[A-Za-z]+?$","",colnames(X))),simplify=F) - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - sapply(names(temp_nn),function(X) - arrows(lwd=2,col=alpha("black",0.5),length=0.1, - x0=temp_labels[X,1],y0=temp_labels[X,2], - x1=temp_labels[temp_nn[[X]],1],y1=temp_labels[temp_nn[[X]],2])) - } - } - - output$tsne <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsne()) - print(plot_tsne_labels()) - } - }) - - output$tsneSave <- downloadHandler( - filename="tsne.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsne()) - print(plot_tsne_labels()) - dev.off() - } - ) - - #### clusterSelect #### - - - clusterSelect <- reactiveValues(cl=NULL) - - observeEvent(input$tsneClick,{ clusterSelect$cl <- input$tsneClick }) - - cSelected <- reactive({ - t <- nearPoints(as.data.frame(dr_viz),clusterSelect$cl,xvar="tSNE_1",yvar="tSNE_2",threshold=5) - t2 <- d$cl[rownames(t)[1],res()] - if (is.na(t2)) { - return("") - } else if (t2 == "Unselected") { - return("") - } else { - return(t2) - } - }) - - hiC <- reactive({ - if (length(res()) < 1) { - return("") - } else if (input$genePlotClust != "") { - d$cl[which(d$cl[,res()] == input$genePlotClust)[1],res()] - } else { - return(input$genePlotClust) - } - }) - - ci <- reactive({ - if (hiC() == "") { - rep(F,length(clusts())) - } else { - clusts() == hiC() - } - }) - - #### Metadata tSNE overlay #### - output$tsneMDlog <- renderUI({ - if (!(is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol]))) { - checkboxGroupInput("tsneMDlog",label="Colour scale", - choices=c("Log scale"="log"),width="100%") - } - }) - - plot_tsneMD <- function() { - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - id <- as.factor(md[,input$tsneMDcol]) - if (length(levels(md[,input$tsneMDcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneMDcol])), - "Dark2")[1:length(levels(md[,input$tsneMDcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneMDcol]))) - } - } else { - if ("log" %in% input$tsneMDlog) { - id <- cut(log10(md[,input$tsneMDcol]),100) - } else { - id <- cut(md[,input$tsneMDcol],100) - } - idcol <- viridis(100,d=-1) - } - layout(cbind(2:1),heights=c(1,9)) - par(mar=c(3,3,0,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(idcol,.1)[id[!ci()]], - bg=alpha(idcol,0.05)[id[!ci()]]) - points(dr_viz[ci(),],pch=21, - col=alpha(idcol,.8)[id[ci()]], - bg=alpha(idcol,0.4)[id[ci()]]) - } else { - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - } - plot_tsne_labels() - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - par(mar=c(0,0,0,0)) - plot.new() - legend("bottom",bty="n",horiz=T,pch=c(NA,rep(21,length(levels(md[,input$tsneMDcol])))), - legend=c(paste0(input$tsneMDcol,":"),levels(md[,input$tsneMDcol])), - col=c(NA,idcol),pt.bg=c(NA,alpha(idcol,0.5))) - } else { - if ("log" %in% input$tsneMDlog) { - tempMain <- paste(input$tsneMDcol,"(log scale)") - } else { - tempMain <- input$tsneMDcol - } - par(mar=c(0,5,3,3)) - barplot(rep(1,100),space=0,col=idcol,xaxt="n",yaxt="n",border=NA,main=tempMain) - text(x=c(1,100),y=1,pos=c(2,4),xpd=NA,labels=round(range(md[,input$tsneMDcol]),2)) - } - } - - output$tsneMD <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsneMD()) - } - }) - - output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsneMD()) - dev.off() - } - ) - - #### Metadata Scatterplot #### - output$scatterLog <- renderUI({ - if ((is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) | - (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY]))) { - checkboxGroupInput("scatterLog",inline=F,label=NULL, - choices=c("Log x axis"="x","Log y axis"="y","Show notch"="notch"), - selected="notch") - } else { - checkboxGroupInput("scatterLog",inline=F,label=NULL, - choices=c("Log x axis"="x","Log y axis"="y")) - } - }) - - plot_mdScatter <- function() { - if ((is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) & - (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY]))) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,"This figure is not designed to compare to categorical variables.") - } else if (is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) { - par(mar=c(3,3,2,1),mgp=2:0) - if (any(ci())) { - temp1 <- tapply(md[!ci(),input$mdScatterY],as.factor(md[!ci(),input$mdScatterX]),c) - temp2 <- tapply(md[ci(),input$mdScatterY],as.factor(md[ci(),input$mdScatterX]),c) - plot(x=NULL,y=NULL,ylim=range(md[,input$mdScatterY]), - xlim=c(0,length(levels(as.factor(md[,input$mdScatterX]))) * 3), - log=sub("notch","",paste(input$scatterLog,collapse="")),xaxt="n", - xlab=input$mdScatterX,ylab=input$mdScatterY) - boxplot(temp1,add=T,xaxt="n",notch="notch" %in% input$scatterLog, - at=seq(1,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3)) - boxplot(temp2,add=T,xaxt="n",notch="notch" %in% input$scatterLog,border="red", - at=seq(2,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3)) - axis(side=1,at=seq(1.5,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3), - labels=names(temp1)) - legend("top",bty="n",xpd=NA,inset=c(0,-.05),pch=0,col="red", - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } else { - boxplot(tapply(md[,input$mdScatterY],as.factor(md[,input$mdScatterX]),c), - xlab=input$mdScatterX,ylab=input$mdScatterY, - log=sub("notch","",paste(input$scatterLog,collapse="")), - notch="notch" %in% input$scatterLog) - } - } else if (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY])) { - par(mar=c(3,3,2,1),mgp=2:0) - if (any(ci())) { - temp1 <- tapply(md[!ci(),input$mdScatterX],as.factor(md[!ci(),input$mdScatterY]),c) - temp2 <- tapply(md[ci(),input$mdScatterX],as.factor(md[ci(),input$mdScatterY]),c) - plot(x=NULL,y=NULL,xlim=range(md[,input$mdScatterX]), - ylim=c(0,length(levels(as.factor(md[,input$mdScatterY]))) * 3), - log=sub("notch","",paste(input$scatterLog,collapse="")),yaxt="n", - xlab=input$mdScatterX,ylab=input$mdScatterY) - boxplot(temp1,add=T,horizontal=T,yaxt="n",notch="notch" %in% input$scatterLog, - at=seq(1,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3)) - boxplot(temp2,add=T,horizontal=T,yaxt="n",notch="notch" %in% input$scatterLog,border="red", - at=seq(2,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3)) - axis(side=2,at=seq(1.5,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3), - labels=names(temp1)) - legend("top",bty="n",xpd=NA,inset=c(0,-.05),pch=0,col="red", - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } else { - boxplot(tapply(md[,input$mdScatterX],as.factor(md[,input$mdScatterY]),c), - horizontal=T,xlab=input$mdScatterX,ylab=input$mdScatterY, - log=sub("notch","",paste(input$scatterLog,collapse="")), - notch="notch" %in% input$scatterLog) - } - } else { - layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5)) - par(mar=c(3,3,0,0),mgp=2:0,cex=1.1) - plot(md[!ci(),input$mdScatterX],md[!ci(),input$mdScatterY], - log=sub("notch","",paste(input$scatterLog,collapse="")), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab=input$mdScatterX,ylab=input$mdScatterY) - points(md[ci(),input$mdScatterX],md[ci(),input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2)) - if (any(ci())) { - legend("topleft",bty="n",pch=21,col="red",pt.bg=alpha("red",0.5), - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } - if ("x" %in% input$scatterLog) { tempLX <- "x" } else { tempLX <- "" } - if ("y" %in% input$scatterLog) { tempLY <- "y" } else { tempLY <- "" } - par(mar=c(0,3,1,0)) - boxplot(tapply(md[,input$mdScatterX],ci(),c),log=tempLX, - horizontal=T,xaxt="n",yaxt="n",border=c("black","red")) - par(mar=c(3,0,0,1)) - boxplot(tapply(md[,input$mdScatterY],ci(),c),log=tempLY, - horizontal=F,xaxt="n",yaxt="n",border=c("black","red")) - } - } - - output$mdScatter <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdScatter()) - } - }) - - output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdScatter()) - dev.off() - } - ) - - #### Metadata Factor Barplot #### - output$mdFactorOpts <- renderUI({ - if (is.factor(md[,input$mdFactorData]) | is.character(md[,input$mdFactorData])) { - radioButtons("mdFactorRA","Factor counts per cluster:",inline=T, - choices=list("Absolute"="absolute","Relative"="relative")) - } else { - checkboxGroupInput("mdFactorOpts",inline=T,label="Figure options", - choices=c("Log scale"="y","Show notch"="notch"),selected="notch") - } - }) - - plot_mdFactor <- function() { - if (is.factor(md[,input$mdFactorData]) | is.character(md[,input$mdFactorData])) { - id <- switch(input$mdFactorRA, - "relative"=tapply(md[,input$mdFactorData],clusts(), - function(X) table(X) / length(X)), - "absolute"=tapply(md[,input$mdFactorData],clusts(),table)) - if (is.list(id)) { id <- do.call(cbind,id) } - idylab <- switch(input$mdFactorRA, - "relative"="Proportion of cells per cluster", - "absolute"="Number of cells per cluster") - if (length(levels(md[,input$mdFactorData])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$mdFactorData])), - "Dark2")[1:length(levels(md[,input$mdFactorData]))] - } else { - idcol <- rainbow2(length(levels(md[,input$mdFactorData]))) - } - par(mar=c(3,3,2,1),mgp=2:0) - barplot(id,col=idcol,ylab=idylab, - legend.text=levels(md[,input$mdFactorData]), - args.legend=list(x="topright",horiz=T,inset=c(0,-.08),bty="n")) - mtext(input$mdFactorData,side=3,adj=0,font=2,line=1,cex=1.2) - } else { - par(mar=c(3,3,2,1),mgp=2:0) - boxplot(tapply(md[,input$mdFactorData],cl[,res()],c), - ylab=input$mdFactorData,notch="notch" %in% input$mdFactorOpts, - log=sub("notch","",paste(input$mdFactorOpts,collapse="")), - border=clustCols(res()),col=alpha(clustCols(res()),0.3)) - } - } - - output$mdFactor <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdFactor()) - } - }) - - output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdFactor()) - dev.off() - } - ) - - - ######## Differentially Expressed Genes per Cluster ######### - - output$heatDEtype <- renderUI({ - if (!is.null(res())) { - if (grepl("^Comp",res())) { - temp <- list("DE vs rest"="deTissue", - "Set A vs Set B"="deMarker") - } else { - temp <- list("DE vs rest"="deTissue", - "Marker genes"="deMarker", - "DE vs neighbour"="deNeighb") - } - radioButtons("heatG","Heapmap Genes:",choices=temp,selected="deMarker") - } - }) - - output$DEgeneSlider <- renderUI({ - if (length(res()) > 0) { - if (input$heatG == "deTissue") { - sliderInput("DEgeneCount",min=1,max=max(sapply(d$deTissue[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression of cluster over tissue", - "# of genes per cluster to show",sep="
"))) - } else if (input$heatG == "deMarker") { - if (grepl("^Comp",res())) { - temp_label <- HTML(paste( - "Positive differential gene expression between sets", - "# of genes per set to show",sep="
")) - } else { - temp_label <- HTML(paste( - "Positive differential gene expression between cluster and all other clusters", - "# of genes per cluster to show",sep="
")) - } - sliderInput("DEgeneCount",min=1,max=max(sapply(d$deMarker[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=temp_label) - } else if (input$heatG == "deNeighb") { - sliderInput("DEgeneCount",min=1,max=max(sapply(deNeighb[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression between cluster and nearest neighbour", - "# of genes per cluster to show",sep="
"))) - } - } - }) - - output$DEclustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("DEclustNum","Cluster # for gene list", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } - }) - - heatGenes <- reactive({ - temp <- unique(unlist(lapply( - switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]), - function(X) - if (nrow(X) == 0) { NA } else { rownames(X)[1:input$DEgeneCount] } - ))) - temp <- temp[!is.na(temp)] - return(temp) - }) - - clustMeans <- reactive({ #This only works if input is in ascending order of adjusted p value. - temp <- sapply(d$CGS[[res()]],function(X) X[heatGenes(),"MTC"]) - rownames(temp) <- heatGenes() - return(t(temp)) - }) - - hC <- reactive({ - if (exists("deDist")) { - if (res() %in% names(deDist)) { - return(hclust(as.dist(deDist[[res()]]),"single")) - } else { - return(hclust(dist(clustMeans()),"single")) - } - } else { - return(hclust(dist(clustMeans()),"single")) - } - }) - hG <- reactive(hclust(dist(t(clustMeans())),"complete")) - - sepClust <- reactive({ - if (hiC() == "") { - return(c(NA,NA)) - } else { - return(nrow(clustMeans()) - - c(which(levels(clusts())[hC()$order] == hiC()) - 1, - which(levels(clusts())[hC()$order] == hiC()))) - } - }) - - plot_heatmap <- function() { - if (length(levels(clusts())) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Heatmap cannot be computed", - "with less than two clusters.",sep="\n")) - } else if (length(heatGenes()) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,"There are no differentially expressed genes.") - } else { - if ("Unselected" %in% levels(clusts())) { - tempLabRow <- c(paste(levels(clusts())[!levels(clusts()) == "Unselected"], - paste(sapply(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": "),"Unselected") - } else { - tempLabRow <- paste(paste0("Cluster ",levels(clusts())), - paste(sapply(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": ") - } - heatmap.2(clustMeans(),Rowv=as.dendrogram(hC()),Colv=as.dendrogram(hG()),scale="column", - margins=c(9,12),lhei=c(2,10),lwid=c(1,11),trace="none", - keysize=1.5,density.info="none",key.par=list(mar=c(3,.5,2,.5),mgp=2:0), - cexCol=1 + 1/log2(nrow(clustMeans())),cexRow=1 + 1/log2(ncol(clustMeans())), - RowSideColors=clustCols(res()),labRow=tempLabRow, - rowsep=sepClust(),col=viridis(100,d=-1)) - } - } - - output$heatmap <- renderPlot({ - if (length(res()) > 0) { - print(plot_heatmap()) - } - }) - - output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_heatmap()) - dev.off() - } - ) - - output$deGeneSave <- downloadHandler( - filename=function() { paste0(input$heatG,"_",input$DEclustNum,".txt") }, - content=function(file) { - outTable <- switch(input$heatG, - deTissue=d$deTissue[[res()]][[input$DEclustNum]], - deMarker=d$deMarker[[res()]][[input$DEclustNum]], - deNeighb=deNeighb[[res()]][[input$DEclustNum]]) - write.table(outTable,file,quote=F,sep="\t",row.names=T,col.names=NA) - } - ) - - - #### Gene search box #### - output$geneSearchBox <- renderUI({ - if (input$searchType == "comma") { - textInput("GOI",width="100%", - label=paste("Enter list of genes,", - "(comma/space-separated, case-insensitive)", - "and click Search")) - } else if (input$searchType == "regex") { - textInput("GOI",value=demoRegex,width="100%", - label="Search for genes by regular expression and click Search") - } - }) - GOI <- eventReactive(input$GOIgo,{ - if (input$searchType == "comma") { - tempGeneList <- strsplit(input$GOI,split="[\\s,]",perl=T)[[1]] - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType == "regex") { - return(grep(input$GOI,rownames(nge),value=T,ignore.case=T)) - } - }, - ignoreNULL=F) - - - #### Gene expression in cluster #### - output$genePlotClustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("genePlotClust","Cluster:",selected=cSelected(), - choices=c("",levels(clusts())[!levels(clusts()) == "Unselected"])) - } - }) - - cellMarkCols <- reactive(rainbow2(length(cellMarkers))) - - plot_clusterGenes <- function() { - doubleDot <- function(col1,col2) { - upper.half.circle <- function(col1){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border=NA) - } - lower.half.circle <- function(col2){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0-cos(rs) - yc <- 0-sin(rs) - polygon(xc,yc,col=col2,border=NA) - } - upper.half.circle(col1) - lower.half.circle(col2) - rs <- seq(0,2*pi,len=200) - polygon(cos(rs),sin(rs),border="white") - } - singleDot <- function(col1){ - rs <- seq(0,2*pi,len=200) - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border="white") - } - par(mar=c(3,3,3,20),mgp=2:0) - if (hiC() == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above", - "or select a cluster from the drop-down list above left", - "to see gene expression for that cluster.",sep="\n")) - } else { - temp_ylab <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - plot(MDTC~DR, - data=d$CGS[[res()]][[hiC()]][ - !((d$CGS[[res()]][[hiC()]]$cMu | d$CGS[[res()]][[hiC()]]$cMs) & - d$CGS[[res()]][[hiC()]]$overCut),], - col=alpha("black",0.3), - xlab="Proportion of cells in which gene was detected", - ylab=paste("Mean normalized gene expression where detected",temp_ylab)) - title(paste0("Cluster ", hiC(),": ",d$clusterID[[res()]][hiC()]),cex=1.2) - mtext(paste("Cells:",sum(clusts()==hiC()), - " Genes detected:",length(d$CGS[[res()]][[hiC()]]$DR)),side=3,line=0,cex=0.9) - box(col=clustCols(res())[hiC()],lwd=2) - - if (input$cgLegend == "markers") { - for (x in which(d$CGS[[res()]][[hiC()]]$cMu)) { - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=singleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs)) { - temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))),"&")) - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=doubleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[as.integer(temp[1])], - col2=cellMarkCols()[as.integer(temp[2])])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMu & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))]) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[as.integer(temp[2])]) - } - legend(x=1.05,y=max(d$CGS[[res()]][[hiC()]]$MDTC),xpd=NA,bty="n",ncol=1, - pch=19,col=cellMarkCols(),legend=names(cellMarkersU)) - - } else if (input$cgLegend == "heatmap") { - degl <- rownames(d$CGS[[res()]][[hiC()]]) %in% - rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[hiC()]])[1:input$DEgeneCount] - if (any(degl)) { - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - temp_n <- nrow(switch(input$heatG, - deTissue=d$deTissue, - deMarker=d$deMarker, - deNeighb=deNeighb)[[res()]][[hiC()]]) - temp_lab <- switch(input$heatG, - deTissue=" DE genes vs rest of cells in sample", - deMarker=" marker genes", - deNeighb=" DE genes vs nearest neighbouring cluster") - legend("top",bty="n",pch=16,col="darkred", - legend=paste0(temp_n,temp_lab," (showing top ", - min(temp_n,input$DEgeneCount),")")) - } else if (input$cgLegend == "search" & length(GOI()) > 0) { - degl <- which(rownames(nge) %in% GOI()) - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - } - } - - output$clusterGenes <- renderPlot({ - if (length(res()) > 0) { - print(plot_clusterGenes()) - } - }) - - output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_clusterGenes()) - dev.off() - } - ) - - #### Gene expression comparison #### - clickGenes <- reactiveVal() - observeEvent(input$cgClick,{ - t <- nearPoints(d$CGS[[res()]][[hiC()]],input$cgClick,xvar="DR",yvar="MDTC") - clickGenes(t$genes) - }) - observeEvent(input$scatterClick,{ - t <- nearPoints(compDF(),input$scatterClick,xvar="x",yvar="y") - clickGenes(t$genes) - }) - - output$cgSelect <- renderUI({ - if (length(res()) > 0) { - if (input$boxplotGene == "click") { - selectInput("cgGene",choices=sort(clickGenes()),label="Select gene from list:") - } else if (input$boxplotGene == "search") { - selectInput("cgGene",choices=sort(GOI()),label="Select gene from list:") - } - } - }) - - plot_geneTest <- function() { - if (input$cgGene == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Select a gene by either clicking on the plot above", - "or searching for genes of interest in the search bar above,", - "then pick the gene from the list just above this figure", - "to see a comparison of that gene's expression across all clusters.",sep="\n")) - } else { - temp_ylab <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - temp_pos <- switch(as.character(length(levels(clusts())) > 1),"TRUE"=hC()$order,"FALSE"=1) - layout(matrix(2:1,nrow=2),heights=c(1,4)) - par(mar=c(3,3,0,3),mgp=2:0) - suppressWarnings(boxplot(vector("list",length(levels(clusts()))), - ylim=range(nge[input$cgGene,]), - ylab=paste(input$cgGene,"normalized gene expression",temp_ylab), - xlab=NA,xaxt="n")) - mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) - mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(get(egDB),keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), - side=1,line=2,font=2) - } - if ("sct" %in% input$bxpOpts) { - bxpCol <- alpha(clustCols(res()),.2) - } else { - bxpCol <- alpha(clustCols(res()),.8) - } - for (i in temp_pos) { - boxplot(nge[input$cgGene,clusts() == levels(clusts())[i]],add=T, - at=which(temp_pos == i),notch="notch" %in% input$bxpOpts,col=bxpCol[i],outline=F) - if ("sct" %in% input$bxpOpts) { - points(jitter(rep(which(temp_pos == i),sum(clusts() == levels(clusts())[i])),amount=.2), - nge[input$cgGene,clusts() == levels(clusts())[i]],pch=20,col=alpha(clustCols(res())[i],.4)) - } - } - if ("rnk" %in% input$bxpOpts) { - points(x=seq_along(d$CGS[[res()]]), - y=sapply(d$CGS[[res()]][temp_pos],function(X) X[input$cgGene,"MTCrank"]) * - max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - pch=25,cex=1.2,col="darkred",bg="firebrick2") - axis(side=4,at=seq(0,1,.25) * max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - labels=percent(seq(0,1,.25)),col.ticks="darkred",col.axis="darkred") - mtext(side=4,line=2,text="Quantile of gene expression per cluster",col="darkred") - } - if (length(temp_pos) > 1) { - par(new=F,mar=c(0,3,1,3)) - plot(as.dendrogram(hC()),leaflab="none") - } - } - } - - output$geneTest <- renderPlot({ - if (length(res()) > 0) { - print(plot_geneTest()) - } - }) - - output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_geneTest()) - dev.off() - } - ) - - - ######## Cluster comparison ######### - output$resSelect2 <- renderUI({ - selectInput("res2","Resolution:",choices=clustList(),selected=res(),width="100%") - }) - output$saveButton2 <- renderUI({ - if (grepl("^Comp",input$res2)) { - actionButton("updateForViz2","Save this comparison to disk",icon("save")) - } - }) - output$setScatterY <- renderUI({ - if ("Unselected" %in% levels(clusts())) { - selectInput("ssY",label="Cluster on Y-axis",selected="Set A", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } else { - selectInput("ssY",label="Cluster on Y-axis",choices=c("",levels(clusts())),selected=hiC()) - } - }) - output$setScatterX <- renderUI({ - if ("Unselected" %in% levels(clusts())) { - selectInput("ssX",label="Cluster on X-axis",selected="Set B", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } else { - selectInput("ssX",label="Cluster on X-axis",choices=c("",levels(clusts())), - selected=unique(gsub(pattern="^vs\\.|\\.[A-Za-z]+?$","", - colnames(deNeighb[[res()]][[hiC()]])))) - } - }) - output$diffLabelSelect <- renderUI({ - if (input$diffLabelType == "diff") { - sliderInput("diffCount",min=1,max=100,value=5,step=1,width="100%", - label="Number of genes to label") - } else if (input$diffLabelType == "de") { - if (input$heatG == "deTissue") { - sliderInput("diffCount",value=5,step=1,ticks=T,width="100%", - min=1,max=max(sapply(d$deTissue[[res()]][c(input$ssX,input$ssY)],nrow)), - label="DE vs rest: # of genes to label") - } else if (input$heatG == "deMarker") { - if (grepl("^Comp",res())) { - temp_label <- "Set A vs Set B: # of genes to label" - } else { - temp_label <- "Marker genes: # of genes to label" - } - sliderInput("diffCount", - min=1,max=max(sapply(d$deMarker[[res()]][c(input$ssX,input$ssY)],nrow)), - value=5,step=1,ticks=T,width="100%", - label=temp_label) - } else if (input$heatG == "deNeighb") { - sliderInput("diffCount",value=5,step=1,ticks=T,width="100%", - min=1,max=max(sapply(deNeighb[[res()]][c(input$ssX,input$ssY)],nrow)), - label="DE vs neighbour: # of genes to label") - } - } - }) - - compDF <- reactive({ - data.frame(y=d$CGS[[res()]][[input$ssY]][,input$scatterInput] - - d$CGS[[res()]][[input$ssX]][,input$scatterInput], - x=rowMeans(cbind(d$CGS[[res()]][[input$ssX]][,input$scatterInput], - d$CGS[[res()]][[input$ssY]][,input$scatterInput])), - genes=d$CGS[[res()]][[input$ssX]]$genes, - row.names=d$CGS[[res()]][[input$ssX]]$genes) - }) - - LBF <- reactive({ - lm(y~x,data=compDF()) - }) - - diffRanked <- reactive({ - if (input$scatterLine == "sub") { - temp <- d$CGS[[res()]][[input$ssY]][,input$scatterInput] - - d$CGS[[res()]][[input$ssX]][,input$scatterInput] - names(temp) <- rownames(d$CGS[[res()]][[input$ssY]]) - return(sort(temp,decreasing=T)) - } else if (input$scatterLine == "lbf") { - temp <- LBF()$residuals - names(temp) <- rownames(d$CGS[[res()]][[input$ssY]]) - return(sort(temp,decreasing=T)) - } - }) - - plot_setScatter <- function() { - if (!is.null(res())) { - if (input$ssX %in% levels(clusts()) & input$ssY %in% levels(clusts())) { - temp_exp <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - temp_label <- switch(input$scatterInput, - "MTC"=paste("mean normalized gene expression",temp_exp), - "MDTC"=paste("mean normalized gene expression where detected",temp_exp), - "DR"="proportion of cells in which gene was detected") - par(mar=c(3,3,2,1),mgp=2:0) - plot(y~x,data=compDF(), - ylab=paste0("Difference in ",temp_label," (",input$ssY," - ",input$ssX,")"), - xlab=paste0("Average of ",temp_label," between ",input$ssY," & ",input$ssX), - main=paste0("MA plot of ", - switch(input$scatterInput, - "MTC"="mean gene expression", - "MDTC"="mean detected gene expression", - "DR"="detection rate"), - " (",input$ssY," vs. ",input$ssX,")"), - pch=20,col=alpha("black",0.3)) - lines(x=c(par("usr")[1],par("usr")[2]),y=c(par("usr")[3],par("usr")[3]), - lwd=2,col=clustCols(res())[which(levels(clusts()) == input$ssX)],xpd=NA) - lines(x=c(par("usr")[1],par("usr")[2]),y=c(par("usr")[4],par("usr")[4]), - lwd=2,col=clustCols(res())[which(levels(clusts()) == input$ssY)],xpd=NA) - if (input$scatterLabelAngle) { - temp_srtX <- 315 - temp_srtY <- 45 - temp_adjX <- c(-0.15,0.5) - temp_adjY <- c(-0.15,0.5) - } else { - temp_srtX <- 45 - temp_srtY <- 315 - temp_adjX <- c(-0.15,0.5) - temp_adjY <- c(-0.15,0.5) - } - if (input$scatterLine == "sub") { - abline(h=0) - } else if (input$scatterLine == "lbf") { - abline(LBF()) - } - if (input$diffLabelType == "diff") { - temp_tY <- names(head(diffRanked(),input$diffCount)) - temp_tX <- names(tail(diffRanked(),input$diffCount)) - points(y~x,data=compDF()[temp_tY,], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssY)],0.8)) - text(compDF()[temp_tY,"x"],compDF()[temp_tY,"y"], - labels=temp_tY,srt=temp_srtY,adj=temp_adjY, - col=clustCols(res())[which(levels(clusts()) == input$ssY)],font=2) - points(y~x,data=compDF()[temp_tX,], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssX)],0.8)) - text(compDF()[temp_tX,"x"],compDF()[temp_tX,"y"], - labels=temp_tX,srt=temp_srtX,adj=temp_adjX, - col=clustCols(res())[which(levels(clusts()) == input$ssX)],font=2) - } else if (input$diffLabelType == "de") { - degX <- rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[input$ssX]])[1:input$diffCount] - if (length(degX) > 0) { - points(y~x,data=compDF()[degX,], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssX)],0.8)) - text(compDF()[degX,"x"],compDF()[degX,"y"],labels=degX,srt=temp_srtX,adj=temp_adjX, - col=clustCols(res())[which(levels(clusts()) == input$ssX)],font=2) - } - degY <- rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[input$ssY]])[1:input$diffCount] - if (length(degY) > 0) { - points(y~x,data=compDF()[degY,], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssY)],0.8)) - text(compDF()[degY,"x"],compDF()[degY,"y"], - labels=degY,srt=temp_srtY,adj=temp_adjY, - col=clustCols(res())[which(levels(clusts()) == input$ssY)],font=2) - } - } else if (input$diffLabelType == "search" & length(GOI()) > 0) { - points(y~x,data=compDF()[GOI()],pch=16,col=alpha("darkred",0.8)) - text(compDF()[GOI(),"x"],compDF()[GOI(),"y"], - labels=GOI(),srt=temp_srtX,adj=temp_adjX,col="darkred",font=2) - } - } - } - } - - output$setScatter <- renderPlot(print(plot_setScatter())) - - output$setScatterSave <- downloadHandler( - filename="setScatter.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_setScatter()) - dev.off() - } - ) - - output$setComparisonSave <- downloadHandler( - filename=function() { paste0(input$ssY,"vs",input$ssX,"_", - input$scatterInput,"_",input$scatterLine,".txt") }, - content=function(file) { - write.table(as.data.frame(diffRanked()),file,quote=F,sep="\t",row.names=T,col.names=F) - } - ) - - - ######## Custom sets for DE ######### - selectedSets <- reactiveValues(a=NULL,b=NULL) - - plot_tsne_selDE <- function() { - if (input$tsneSelDEcol == "") { - id <- rep(1,nrow(md)) - idcol <- "grey20" - } else if (is.factor(md[,input$tsneSelDEcol]) | is.character(md[,input$tsneSelDEcol])) { - id <- as.factor(md[,input$tsneSelDEcol]) - if (length(levels(md[,input$tsneSelDEcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneSelDEcol])), - "Dark2")[1:length(levels(md[,input$tsneSelDEcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneSelDEcol]))) - } - } else { - id <- cut(md[,input$tsneSelDEcol],100) - idcol <- viridis(100,d=-1) - } - par(mar=c(3,3,3,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - - if (input$tsneSelDEcol == "") { - } else if (is.factor(md[,input$tsneSelDEcol]) | is.character(md[,input$tsneSelDEcol])) { - legend("topleft",bty="n",horiz=T,xpd=NA,inset=c(0,-.09), - pch=21,col=idcol,pt.bg=alpha(idcol,0.5), - title=input$tsneSelDEcol,legend=levels(md[,input$tsneSelDEcol])) - } else { - legend("topleft",bty="n",horiz=T,xpd=NA,inset=c(0,-.09), - pch=21,col=viridis(3,d=-1),pt.bg=viridis(3,.5,d=-1), - title=input$tsneSelDEcol, - legend=c(round(min(md[,input$tsneSelDEcol]),2), - round((max(md[,input$tsneSelDEcol]) - - min(md[,input$tsneSelDEcol])) / 2,2), - round(max(md[,input$tsneSelDEcol]),2))) - } - - - points(dr_viz[selectedSets$a,],pch=19,col="#a50026") - points(dr_viz[selectedSets$b,],pch=19,col="#313695") - points(dr_viz[intersect(selectedSets$a,selectedSets$b),],pch=19,col="#ffffbf") - points(dr_viz[intersect(selectedSets$a,selectedSets$b),],pch=4,col="red") - - legend("topright",horiz=T,bty="n",xpd=NA,inset=c(0,-.09), - title="Selected Cells",legend=c("Set A","Set B","Both"), - pch=c(19,19,4),col=c("#a50026","#313695","red")) - } - output$tsneSelDE <- renderPlot({ print(plot_tsne_selDE()) }) - - - currSel <- reactive(rownames(brushedPoints(as.data.frame(dr_viz), - input$tsneBrush,xvar="tSNE_1",yvar="tSNE_2"))) - observeEvent(input$addCellsA,{ - selectedSets$a <- append(selectedSets$a,currSel()[!currSel() %in% selectedSets$a]) - }) - observeEvent(input$removeCellsA,{ - selectedSets$a <- selectedSets$a[!selectedSets$a %in% currSel()] - }) - observeEvent(input$addCellsB,{ - selectedSets$b <- append(selectedSets$b,currSel()[!currSel() %in% selectedSets$b]) - }) - observeEvent(input$removeCellsB,{ - selectedSets$b <- selectedSets$b[!selectedSets$b %in% currSel()] - }) - output$textSetA <- renderText(paste(length(selectedSets$a),"cells in Set A.")) - output$textSetB <- renderText(paste(length(selectedSets$b),"cells in Set B.")) - output$textOverlap <- renderText(paste(length(intersect(selectedSets$a,selectedSets$b)), - "cells in both sets.", - "Cells must be assigned to a single set prior to calculation.")) - - observeEvent(input$calcDE,{ - newRes <- paste0("Comp.",gsub("[^A-Za-z0-9]","",input$DEsetName)) - if (length(intersect(selectedSets$a,selectedSets$b)) > 0) { - output$calcText <- renderText("Sets can't overlap (please assign red cells to only one set).") - } else if (any(sapply(list(selectedSets$a,selectedSets$b),length) < 3)) { - output$calcText <- renderText("Each set must contain at least 3 cells.") - } else if (nchar(input$DEsetName) < 1) { - output$calcText <- renderText("Please name this comparison (in text box above).") - } else if (newRes %in% colnames(d$cl)) { - output$calcText <- renderText("This comparison name has already been used.") - } else { - output$calcText <- renderText("") - withProgress({ - temp <- rep("Unselected",nrow(d$cl)) - names(temp) <- rownames(d$cl) - temp[selectedSets$a] <- "Set A" - temp[selectedSets$b] <- "Set B" - d$cl[[newRes]] <- factor(temp) - - #### Gene stats per set #### - incProgress(amount=1/6,detail="Gene detection rate per set") - setCells <- d$cl[,newRes] != "Unselected" - DR <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) sum(Y>0)/length(Y))) - - incProgress(amount=1/6,detail="Mean detected gene expression per set") - MDTC <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) { - temp <- meanLogX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - - incProgress(amount=1/6,detail="Mean gene expression per set") - MTC <- apply(nge,1,function(X) - tapply(X,d$cl[,newRes],meanLogX)) - - d$CGS[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - d$CGS[[newRes]][["Unselected"]] <- data.frame(MTC=MTC["Unselected",]) - for (i in names(d$CGS[[newRes]])) { - d$CGS[[newRes]][[i]]$MTCrank <- rank(d$CGS[[newRes]][[i]]$MTC, - ties.method="min")/nrow(d$CGS[[newRes]][[i]]) - if (i == "Unselected") { next } - d$CGS[[newRes]][[i]]$cMu <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersU) - d$CGS[[newRes]][[i]]$cMs <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersS) - d$CGS[[newRes]][[i]]$overCut <- d$CGS[[newRes]][[i]]$MTC > mean(d$CGS[[newRes]][[i]]$MTC) - d$CGS[[newRes]][[i]]$genes <- rownames(d$CGS[[newRes]][[i]]) - } - if (length(cellMarkers) < 1) { - d$clusterID[[newRes]] <- sapply(d$CGS[[newRes]],function(Z) return("")) - } else if (!any(unlist(cellMarkers) %in% rownames(nge))) { - warning(paste("None of the provided cellMarkers are found in the data", - "(check your gene IDs against rownames in your data).")) - d$clusterID[[newRes]] <- sapply(d$CGS[[newRes]],function(Z) return("")) - } else { - d$clusterID[[newRes]] <- c(names(cellMarkers)[sapply(d$CGS[[newRes]][1:2],function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))], - "Unselected") - names(d$clusterID[[newRes]]) <- names(d$CGS[[newRes]]) - } - - #### deTissue - DE per cluster vs all other data #### - incProgress(amount=1/6,detail="DE vs tissue logGER calculations") - deT_logGER <- sapply(levels(d$cl[,newRes])[1:2],function(i) - MTC[i,] - apply(nge[,d$cl[,newRes] != i],1,meanLogX)) - deT_genesUsed <- apply(deT_logGER,2,function(X) which(X > logGERthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logGERthresh should be set to less than ", - min(apply(deT_logGER,2,function(X) max(abs(X)))), - ", the largest magnitude logGER between cluster ", - names(which.min(apply(deT_logGER,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - incProgress(amount=1/6,detail="DE vs tissue Wilcoxon rank sum calculations") - deT_pVal <- sapply(levels(d$cl[,newRes])[1:2],function(i) - apply(nge[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[d$cl[,newRes] == i],X[d$cl[,newRes] != i])$p.value),simplify=F) - d$deTissue[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(i) - data.frame(logGER=deT_logGER[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply( - p.adjust(do.call(rbind,d$deTissue[[newRes]])$pVal,"fdr"), - rep(names(sapply(d$deTissue[[newRes]],nrow)),sapply(d$deTissue[[newRes]],nrow)), - c) - for (i in names(d$deTissue[[newRes]])) { - d$deTissue[[newRes]][[i]] <- d$deTissue[[newRes]][[i]][tempQval[[i]] <= WRSTalpha,] - d$deTissue[[newRes]][[i]]$qVal <- tempQval[[i]][tempQval[[i]] <= WRSTalpha] - } - - #### deMarker - DE per cluster vs each other cluster #### - incProgress(amount=1/6,detail="Calculating Set A vs Set B") - - deM_dDR <- DR["Set A",] - DR["Set B",] - deM_logGER <- MTC["Set A",] - MTC["Set B",] - deM_genesUsed <- switch(threshType, - dDR=which(abs(deM_dDR) > dDRthresh), - logGER=which(abs(deM_logGER) > logGERthresh)) - if (length(deM_genesUsed) < 1) { - stop("Gene filtering threshold is set too high.") - } - - deM_pVal <- apply(nge[deM_genesUsed,],1,function(X) - wilcox.test(X[d$cl[,newRes] == "Set A"], - X[d$cl[,newRes] == "Set B"])$p.value) - - temp_deVS <- data.frame(dDR=deM_dDR[deM_genesUsed], - logGER=deM_logGER[deM_genesUsed], - pVal=deM_pVal)[order(deM_pVal),] - temp_deVS$qVal <- p.adjust(temp_deVS$pVal,"fdr") - - d$deMarker[[newRes]] <- list( - "Set A"=temp_deVS[temp_deVS[,threshType] > 0 & temp_deVS$qVal <= WRSTalpha,], - "Set B"=temp_deVS[temp_deVS[,threshType] < 0 & temp_deVS$qVal <= WRSTalpha,] - ) - d$deMarker[[newRes]][["Set B"]]$dDR <- d$deMarker[[newRes]][["Set B"]]$dDR * -1 - d$deMarker[[newRes]][["Set B"]]$logGER <- d$deMarker[[newRes]][["Set B"]]$logGER * -1 - - selectedSets$a <- selectedSets$b <- NULL - },message="DE calculations:") - - res(newRes) # Automatically update the view to show the calculated results. - } - }) - observeEvent(input$updateForViz, { - withProgress({ - new_cl <- d$cl[input$res] - new_CGS <- list() - for (i in names(d$CGS[[input$res]])) { - new_CGS[[input$res]][[i]] <- - d$CGS[[input$res]][[i]][colnames(d$CGS[[input$res]][[i]]) %in% c("DR","MDTC","MTC")] - } - new_deTissue <- d$deTissue[input$res] - new_deMarker <- d$deMarker[input$res] - incProgress(.5) - save(new_cl,new_CGS,new_deTissue,new_deMarker, - file=paste0(dataPath,dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData")) - },message=paste0( - "Saving ",dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData to ",dataPath)) - }) - observeEvent(input$updateForViz2, { - withProgress({ - new_cl <- d$cl[input$res] - new_CGS <- list() - for (i in names(d$CGS[[input$res]])) { - new_CGS[[input$res]][[i]] <- - d$CGS[[input$res]][[i]][colnames(d$CGS[[input$res]][[i]]) %in% c("DR","MDTC","MTC")] - } - new_deTissue <- d$deTissue[input$res] - new_deMarker <- d$deMarker[input$res] - incProgress(.5) - save(new_cl,new_CGS,new_deTissue,new_deMarker, - file=paste0(dataPath,dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData")) - },message=paste0( - "Saving ",dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData to ",dataPath)) - }) - - - ######## Distribution of genes of interest ######### - output$geneSearchBox1 <- renderUI({ - if (input$searchType1 == "comma") { - textInput("GOI1",width="100%", - label=paste("Enter list of genes")) - } else if (input$searchType1 == "regex") { - textInput("GOI1",value=demoRegex,width="100%", - label="Enter regular expression") - } - }) - - GOI1 <- eventReactive(input$GOI1go,{ - if (input$searchType1 == "comma") { - tempGeneList <- "" - try({ - tempGeneList <- strsplit(input$GOI1,split="[\\s,]",perl=T)[[1]] - },silent=T) - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType1 == "regex") { - return(grep(input$GOI1,rownames(nge),value=T,ignore.case=T)) - } - },ignoreNULL=F) - - output$GOI1select <- renderUI({ - selectInput("goi1",label="Select genes:",choices=sort(GOI1()),multiple=T) - }) - - output$geneSearchBox2 <- renderUI({ - if (input$searchType2 == "comma") { - textInput("GOI2",width="100%", - label=paste("Search by list of genes")) - } else if (input$searchType2 == "regex") { - textInput("GOI2",value=demoRegex,width="100%", - label="Search by regular expression") - } - }) - - GOI2 <- eventReactive(input$GOI2go,{ - if (input$searchType2 == "comma") { - tempGeneList <- "" - try({ - tempGeneList <- strsplit(input$GOI2,split="[\\s,]",perl=T)[[1]] - },silent=T) - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType2 == "regex") { - return(grep(input$GOI2,rownames(nge),value=T,ignore.case=T)) - } - },ignoreNULL=F) - - output$GOI2select <- renderUI({ - selectInput("goi2",label="Select genes:",choices=sort(GOI2()),multiple=T) - }) - - plot_tsneClust <- function() { - par(mar=c(3,3,4,1),mgp=2:0) - plot(dr_viz,pch=21, - col=alpha(clustCols(res())[clusts()],1), - bg=alpha(clustCols(res())[clusts()],0.5), - xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs")) - } - - plot_goi <- function(goi) { - if (length(goi) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("To search for your gene(s) of interest type a", - "list of genes or regex in the box above", - "then select the gene(s) from the drop-down list", - "in the \"Select genes:\" box above right.",sep="\n")) - } else { - if (length(goi) > 5) { goiL <- 5 } else { goiL <- length(goi) } - if (goiL > 1) { - gv <- apply(nge[goi,],2,max) - } else { - gv <- nge[goi,] - } - cv <- cut(gv,breaks=100,labels=F) - par(mar=c(3,3,goiL+1,1),mgp=2:0) - plot(dr_viz,pch=21,cex=1.3,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(100,.7,d=-1)[cv],bg=viridis(100,.3,d=-1)[cv]) - temp_yrange <- max(dr_viz[,2]) - min(dr_viz[,2]) - segments(x0=seq(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.95),length.out=1000), - y0=max(dr_viz[,2]) + temp_yrange * .045, - y1=max(dr_viz[,2]) + temp_yrange * .065, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.75), - quantile(range(dr_viz[,1]),.95)), - y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), - labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- - select(get(egDB),keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - if (length(tempGeneName) > 4) { - tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] - } - title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) - } - } - } - - output$goiPlot1 <- renderPlot({ - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - output$goiPlot2 <- renderPlot({ - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - -} -########## ShinyApp ########## -shinyApp(ui = ui, server = server) \ No newline at end of file diff --git a/ToBeConvertedToPkg/liver/HumanLiver_deVS.RData b/ToBeConvertedToPkg/liver/HumanLiver_deVS.RData deleted file mode 100644 index d87863c..0000000 Binary files a/ToBeConvertedToPkg/liver/HumanLiver_deVS.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/liver/HumanLiver_forViz.RData b/ToBeConvertedToPkg/liver/HumanLiver_forViz.RData deleted file mode 100644 index 435599d..0000000 Binary files a/ToBeConvertedToPkg/liver/HumanLiver_forViz.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/liver/HumanLiver_savedRes.RData b/ToBeConvertedToPkg/liver/HumanLiver_savedRes.RData deleted file mode 100644 index cf63991..0000000 Binary files a/ToBeConvertedToPkg/liver/HumanLiver_savedRes.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/liver/intro.md b/ToBeConvertedToPkg/liver/intro.md deleted file mode 100644 index e50b001..0000000 --- a/ToBeConvertedToPkg/liver/intro.md +++ /dev/null @@ -1 +0,0 @@ -HumanLiver: You can add to this preamble by editting liver/intro.md diff --git a/ToBeConvertedToPkg/meCortex/e11/app.R b/ToBeConvertedToPkg/meCortex/e11/app.R deleted file mode 100644 index 4ce23b0..0000000 --- a/ToBeConvertedToPkg/meCortex/e11/app.R +++ /dev/null @@ -1,1294 +0,0 @@ -######## User-defined variables ######## - -dataPath <- "e11_Cortical_Only_forViz.RData" -## ^ Point this to the output file from PrepareInputs.R -## If you set a default resolution in the Shiny app, it will save to the same directory. - -vizScriptPath <- "./" -## ^ Point this to the directory in which the "app.R" Shiny script resides - -species <- "mouse" -## ^ Set species ("mouse"/"human"). -## If other, add the annotation database from Bioconductor to the egDB <- switch() expression below. - - -#### List known cell-type markers #### -cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna","Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b","Tle4", - "Nes","Cux1","Cux2","Tubb3","Rbfox3","Dcx")) -#cellMarkers <- list() -## ^ If you have canonical marker genes for expected cell types, list them here -## (see example above from mouse embryonic cortex). The Shiny app will attempt -## to label clusters in the tSNE projection by highest median gene expression. -## Otherwise leave the list blank (uncomment line above). - - -#### Variables for differential expression analysis #### -exponent <- 2 -## ^ log base of your normalized input data. -## Seurat defaults to natural log (set this to exp(1)), -## other methods are generally log2 (set this to 2). -pseudocount <- 1 -## ^ pseudocount added to all log-normalized values in your input data. -## Most methods use a pseudocount of 1 to eliminate log(0) errors. - -#threshType <- "logFC" # use a fold-change-based threshold for filtering genes prior to DE testing -threshType <- "dDR" # use a difference in detection rate threshold for filtering -## Filtering genes for use in differential expression testing can be done multiple ways. -## We use a fold-change filter for comparing each cluster to the tissue as a whole, but find that -## difference in detection rates works better when comparing clusters to each other. You can set -## threshType to "logFC" to use fold-change for all gene filtering if you'd prefer. - -logFCthresh <- 1 # magnitude of mean log-expression fold change between clusters to use as filter. -dDRthresh <- 0.15 # magnitude of detection rate difference between clusters to use as filter. -WRSTalpha <- 0.01 # significance level for DE testing using Wilcoxon rank sum test - - -######################################## - - - -######## Code to run the Shiny app ######## -library(markdown) -library(shiny) -library(cluster) -library(gplots) -library(scales) -library(viridis) -library(RColorBrewer) -library(TeachingDemos) - -library(org.Mm.eg.db) -egDB <- "org.Mm.eg.db" - -mean.logX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -if (length(cellMarkers) < 1) { - cellMarkersS <- cellMarkersU <- list() -} else { - cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2,function(X) do.call(intersect,unname(cellMarkers[X]))) - try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2,function(X) paste(X,collapse="&")),silent=T) - cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] - cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) -} - -demoRegex <- switch(species,mouse="^Actb$",human="^ACTB$") - -load(dataPath) -temp_dataPath <- strsplit(dataPath,"/|\\\\") -dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",dataPath) -if (dataPath == "") { dataPath <- "./" } -dataTitle <- sub("\\..+$|_forViz\\..+$","",temp_dataPath[[1]][length(temp_dataPath[[1]])]) -rm(temp_dataPath) - -if (file.exists(paste0(dataPath,dataTitle,"_savedRes.RData"))) { - load(paste0(dataPath,dataTitle,"_savedRes.RData")) -} else { - savedRes <- NULL -} - -if (!file.exists(paste0(dataPath,"intro.md"))) { - write(paste0(dataTitle,": You can add to this preamble by editting ",dataPath,"intro.md"), - file=paste0(dataPath,"intro.md")) -} - -silDist <- dist(dr_clust,method="euclidean") -## ^ precalculating distances in reduced dimensionality space for the silhouette plot. - -for (l in names(CGS)) { - for (i in names(CGS[[l]])) { - CGS[[l]][[i]]$MTCrank <- rank(CGS[[l]][[i]]$MTC,ties.method="min")/nrow(CGS[[l]][[i]]) - CGS[[l]][[i]]$cMu <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersU) - CGS[[l]][[i]]$cMs <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersS) - CGS[[l]][[i]]$overCut <- CGS[[l]][[i]]$MTC > mean(CGS[[l]][[i]]$MTC) - CGS[[l]][[i]]$genes <- rownames(CGS[[l]][[i]]) - } -} - -if (length(cellMarkers) < 1) { - clusterID <- sapply(colnames(cl),function(X) rep("",nrow(cl)),simplify=F) -} else { - clusterID <- sapply(CGS,function(Z) { - temp <- names(cellMarkers)[sapply(Z,function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))] - names(temp) <- names(Z) - return(temp) - },simplify=F) -} - -#### Run the Shiny App! #### - - -########## UI ########## -ui <- fixedPage( - fixedRow( - titlePanel(paste("scClustViz -",dataTitle)), - includeMarkdown(paste0(dataPath,"intro.md")) - ), - hr(), - - ######## Cluster Resolution Selection ######## - fixedRow( - titlePanel("Cluster Resolution Selection"), - column(6, - fixedRow(column(6,uiOutput("resSelect"),align="left"), - column(6,align="right", - actionButton("go","View clusters at this resolution",icon("play")), - actionButton("save","Save this resolution as default",icon("bookmark")))), - radioButtons("deType",NULL,list("# of DE genes to nearest neighbouring cluster"="deNeighb", - "# of marker genes per cluster"="deMarker"),inline=T), - plotOutput("cqPlot",height="500px")), - column(6,plotOutput("sil",height="600px")) - ), - fixedRow( - column(6,downloadButton("cqPlotSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") - ), - hr(), - - ######## Cell-type Clusters ######## - fixedRow(titlePanel("Cell-type Clusters")), - fixedRow( - column(6, - if (length(cellMarkers) > 0) { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn","Cluster annotations"="ca")) - } else { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn")) - }, - strong("Click point on plot below to select cluster")), - column(6,selectInput("tsneMDcol","Metadata:",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])) - ), - fixedRow( - column(6,plotOutput("tsne",height="580px",click="tsneClick")), - column(6,plotOutput("tsneMD",height="580px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("tsneMDSave","Save as PDF")) - ), - hr(), - - fixedRow( - column(3,selectInput( - "mdScatterX","x axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_counts"),align="left"), - column(3,selectInput( - "mdScatterY","y axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_features"),align="left"), - column(3,selectInput( - "mdFactorData","Metadata (factor):", - choices=colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - selected=grep("phase", - colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - value=T,ignore.case=T)[1])), - column(3,radioButtons("mdFactorRA","Factor counts per cluster:",inline=T, - choices=list("Absolute"="absolute","Relative"="relative"))) - ), - fixedRow( - column(6,plotOutput("mdScatter",height="560px")), - column(6,plotOutput("mdFactor",height="560px")) - ), - fixedRow( - column(6,align="left",downloadButton("mdScatterSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) - ), - hr(), - - ######## Cluster-wise Gene Stats ######### - fixedRow(titlePanel("Cluster-wise Gene Stats")), - fixedRow( - column(2,uiOutput("heatDEtype")), - column(2,uiOutput("DEclustSelect")), - column(2,downloadButton("deGeneSave","Download gene list"), - downloadButton("heatmapSave","Save as PDF"),align="right"), - column(6,uiOutput("DEgeneSlider")) - ), - fixedRow(plotOutput("heatmap",height="600px")), - hr(), - - fixedRow( - column(2,uiOutput("genePlotClustSelect")), - column(6,if (length(cellMarkers) > 0) { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Cell-type markers"="markers", - "Top DE genes (from heatmap)"="heatmap", - "Gene symbols (regex)"="regex")) - } else { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Top DE genes (from heatmap)"="heatmap", - "Gene symbols (regex)"="regex")) - }), - column(3,align="right",textInput("GOI","Gene symbols (regex)",demoRegex)), - column(1,actionButton("GOIgo","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOIgo { margin-top: 25px; }"), - fixedRow( - plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") - ), - hr(), - - fixedRow( - column(4,uiOutput("cgSelect")), - column(8, - radioButtons("boxplotGene",inline=T,label="Gene of interest:", - choices=c("Click from plot above"="click", - "From gene symbols (regex entry)"="regex")), - checkboxGroupInput("bxpOpts",label=NULL,selected=c("sct","rnk"),inline=T, - choices=list("Include scatterplot"="sct", - "Include gene rank"="rnk"))) - ), - fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") - ), - hr(), - - ######## Distribution of genes of interest ######### - fixedRow(titlePanel("Distribution of Genes of Interest")), - fixedRow( - column(4, - fixedRow( - radioButtons("plotClust1",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel1",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI1",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI1go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI1go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI1select")), - column(4, - fixedRow( - radioButtons("plotClust2",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel2",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI2",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI2go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI2go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI2select")) - ), - fixedRow( - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")), - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")) - ), - fixedRow( - column(6,plotOutput("goiPlot1",height="600px")), - column(6,plotOutput("goiPlot2",height="600px")) - ), - fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) - ), - hr(), - - ######## Custom sets for DE ######### - fixedRow( - column(6,plotOutput("tsneSelDE",brush="tsneBrush",height="580px")), - column(6, - actionButton("addCellsA","Set A: Add Cells",icon("plus")), - actionButton("removeCellsA","Set A: Remove Cells",icon("minus")), - hr(), - actionButton("addCellsB","Set B: Add Cells",icon("plus")), - actionButton("removeCellsB","Set B: Remove Cells",icon("minus")), - hr(), - textInput("DEsetName","Short name for this comparison:", - placeholder="A-z0-9 only please"), - actionButton("calcDE","Calculate DE and Save",icon("play")), - hr(), - textOutput("calcText") - ) - ), - h1() -) - - -########## Server ########## -server <- function(input,output,session) { - d <- reactiveValues(cl=cl,CGS=CGS, - clusterID=clusterID, - deTissue=deTissue, - deMarker=deMarker) - - clustCols <- reactive({ - if (grepl("^Comp",input$res)) { - c(brewer.pal(3,"PRGn")[c(1,3)],"grey80") - } else if (length(levels(d$cl[,input$res])) <= 8) { - brewer.pal(length(levels(d$cl[,input$res])),"Dark2")[1:length(levels(d$cl[,input$res]))] - } else { - rainbow2(length(levels(d$cl[,input$res]))) - } - }) - - - ######## Cluster Resolution Selection ######## - #### Inter-cluster DE boxplots #### - numClust <- sapply(cl,function(X) length(levels(X))) - clustList <- reactive({ - temp <- as.list(colnames(d$cl)) - names(temp)[seq_along(numClust)] <- paste0(unlist(temp)[seq_along(numClust)], - ": ",numClust," clusters") - if (length(temp) > length(numClust)) { - names(temp)[setdiff(seq_along(temp),seq_along(numClust))] <- - paste0("Comparison: ", - sub("Comp.","",fixed=T, - x=unlist(temp)[setdiff(seq_along(temp),seq_along(numClust))])) - } - return(temp) - }) - output$resSelect <- renderUI({ - selectInput("res","Resolution:",choices=clustList(),selected=savedRes) - }) - numClust <- numClust[numClust > 1] - - plot_cqPlot <- function() { - numDEgenes <- lapply(get(input$deType),function(X) sapply(X,nrow)) - toplim <- c(21,max(unlist(numDEgenes)) + 20) - botlim <- c(-1,21) - - if (grepl("^Comp",input$res)) { - par(mar=c(3,3.5,1,1)) - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Press 'View clusters at this resolution'", - "to view the comparison", - sub("Comp.","",input$res,fixed=T),sep="\n")) - } else { - par(mar=c(0.2,3.5,1,1),mgp=2:0,mfrow=c(2,1)) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=toplim,yaxs="i",xaxt="n",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - - par(mar=c(3,3.5,0.2,1),mgp=2:0) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=botlim,yaxs="i",xlab="Number of clusters",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - mtext(switch(input$deType, - "deMarker"="Positive DE genes per cluster to all other clusters", - "deNeighb"="Positive DE genes per cluster to nearest cluster") - ,side=2,line=2.5,at=botlim[2],xpd=NA) - } - } - - output$cqPlot <- renderPlot({ - print(plot_cqPlot()) - }) - - output$cqPlotSave <- downloadHandler( - filename="cqPlot.pdf", - content=function(file) { - pdf(file,width=7,height=6) - print(plot_cqPlot()) - dev.off() - } - ) - - #### Silhouette plot #### - plot_sil <- function() { - tempSil <- silhouette(as.integer(d$cl[,input$res]),dist=silDist) - par(mar=c(4.5,.5,1.5,1.5),mgp=2:0) - if (length(tempSil) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Silhouette plot cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - plot(tempSil,beside=T,border=NA,main=NA,col=clustCols(),do.n.k=T) - } - } - - output$sil <- renderPlot({ - print(plot_sil()) - }) - - output$silSave <- downloadHandler( - filename="sil.pdf", - content=function(file) { - pdf(file,width=6,height=7) - print(plot_sil()) - dev.off() - } - ) - - #### res buttons #### - res <- eventReactive(input$go,input$res,ignoreNULL=F) - - observeEvent(input$save,{ - savedRes <<- input$res #<<- updates variable outside scope of function (ie. global environment) - save(savedRes,file=paste0(dataPath,dataTitle,"_savedRes.RData")) - }) - - - ######## Cell-type Clusters ######## - clusts <- reactive(d$cl[,res()]) - - #### Cell-type tSNE #### - plot_tsne_labels <- function() { - if (input$tsneLabels == "ca") { - temp_labelNames <- sapply(unique(d$clusterID[[res()]]),function(X) - names(which(d$clusterID[[res()]] == X)),simplify=F) - temp_labels <- apply(dr_viz,2,function(Y) - tapply(Y,apply(sapply(temp_labelNames,function(X) clusts() %in% X),1,which),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=names(temp_labelNames),font=2,cex=1.5) - } else if (input$tsneLabels == "cn") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=levels(clusts()),font=2,cex=1.5) - } else { - legend("center",legend="You changed the label choice names...") - } - } - - plot_tsne <- function() { - par(mar=c(3,3,4,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs"), - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(clustCols()[clusts()],0.2)[!ci()], - bg=alpha(clustCols()[clusts()],0.1)[!ci()]) - points(dr_viz[ci(),],pch=21, - col=alpha(clustCols()[clusts()],1)[ci()], - bg=alpha(clustCols()[clusts()],0.5)[ci()]) - } else { - points(dr_viz,pch=21, - col=alpha(clustCols()[clusts()],1), - bg=alpha(clustCols()[clusts()],0.5)) - } - if (hiC() != "") { - mtext(side=3,line=-1,text=paste("Cluster",hiC(),"-", - d$clusterID[[res()]][hiC()],"-", - sum(clusts() == hiC()),"cells")) - } - } - - output$tsne <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsne()) - print(plot_tsne_labels()) - } - }) - - output$tsneSave <- downloadHandler( - filename="tsne.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsne()) - print(plot_tsne_labels()) - dev.off() - } - ) - - #### clusterSelect #### - - - clusterSelect <- reactiveValues(cl=NULL) - - observeEvent(input$tsneClick,{ clusterSelect$cl <- input$tsneClick }) - - cSelected <- reactive({ - t <- nearPoints(as.data.frame(dr_viz),clusterSelect$cl,xvar="tSNE_1",yvar="tSNE_2",threshold=5) - t2 <- d$cl[rownames(t)[1],res()] - if (is.na(t2)) { - return("") - } else if (t2 == "Unselected") { - return("") - } else { - return(t2) - } - }) - - hiC <- reactive({ - if (length(res()) < 1) { - return("") - } else if (input$genePlotClust != "") { - d$cl[which(d$cl[,res()] == input$genePlotClust)[1],res()] - } else { - return(input$genePlotClust) - } - }) - - ci <- reactive({ - if (hiC() == "") { - rep(F,length(clusts())) - } else { - clusts() == hiC() - } - }) - - #### Metadata tSNE overlay #### - plot_tsneMD <- function() { - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - id <- as.factor(md[,input$tsneMDcol]) - if (length(levels(md[,input$tsneMDcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneMDcol])), - "Dark2")[1:length(levels(md[,input$tsneMDcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneMDcol]))) - } - } else { - id <- cut(md[,input$tsneMDcol],100) - idcol <- viridis(100,d=-1) - } - layout(cbind(2:1),heights=c(1,9)) - par(mar=c(3,3,0,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(idcol,.1)[id[!ci()]], - bg=alpha(idcol,0.05)[id[!ci()]]) - points(dr_viz[ci(),],pch=21, - col=alpha(idcol,.8)[id[ci()]], - bg=alpha(idcol,0.4)[id[ci()]]) - } else { - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - } - plot_tsne_labels() - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - par(mar=c(0,0,0,0)) - plot.new() - legend("bottom",bty="n",horiz=T,pch=c(NA,rep(21,length(levels(md[,input$tsneMDcol])))), - legend=c(paste0(input$tsneMDcol,":"),levels(md[,input$tsneMDcol])), - col=c(NA,idcol),pt.bg=c(NA,alpha(idcol,0.5))) - } else { - par(mar=c(0,5,3,3)) - barplot(rep(1,100),space=0,col=idcol,xaxt="n",yaxt="n",border=NA,main=input$tsneMDcol) - text(x=c(1,100),y=1,pos=c(2,4),xpd=NA,labels=round(range(md[,input$tsneMDcol]),2)) - } - } - - output$tsneMD <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsneMD()) - } - }) - - output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsneMD()) - dev.off() - } - ) - - #### Metadata Factor Barplot #### - plot_mdFactor <- function() { - id <- switch(input$mdFactorRA, - "relative"=tapply(md[,input$mdFactorData],clusts(), - function(X) table(X) / length(X)), - "absolute"=tapply(md[,input$mdFactorData],clusts(),table)) - if (is.list(id)) { id <- do.call(cbind,id) } - idylab <- switch(input$mdFactorRA, - "relative"="Proportion of cells per cluster", - "absolute"="Number of cells per cluster") - if (length(levels(md[,input$mdFactorData])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$mdFactorData])), - "Dark2")[1:length(levels(md[,input$mdFactorData]))] - } else { - idcol <- rainbow2(length(levels(md[,input$mdFactorData]))) - } - par(mar=c(3,3,4,1),mgp=2:0) - barplot(id,col=idcol,ylab=idylab, - legend.text=levels(md[,input$mdFactorData]), - args.legend=list(x="topright",horiz=T,inset=c(0,-.08),bty="n")) - mtext(input$mdFactorData,side=3,adj=0,font=2,line=1,cex=1.2) - } - - output$mdFactor <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdFactor()) - } - }) - - output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdFactor()) - dev.off() - } - ) - - #### Metadata Scatterplot #### - plot_mdScatter <- function() { - layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5)) - par(mar=c(3,3,0,0),mgp=2:0,cex=1.1) - if (all(ci())) { - plot(md[,input$mdScatterX],md[,input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2), - xlab=input$mdScatterX,ylab=input$mdScatterY) - } else { - plot(md[!ci(),input$mdScatterX],md[!ci(),input$mdScatterY], - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab=input$mdScatterX,ylab=input$mdScatterY) - points(md[ci(),input$mdScatterX],md[ci(),input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2)) - } - if (any(ci())) { - legend("topleft",bty="n",pch=21,col="red",pt.bg=alpha("red",0.5), - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } - par(mar=c(0,3,1,0)) - boxplot(tapply(md[,input$mdScatterX],ci(),c), - horizontal=T,xaxt="n",yaxt="n",border=c("black","red")) - par(mar=c(3,0,0,1)) - boxplot(tapply(md[,input$mdScatterY],ci(),c), - horizontal=F,xaxt="n",yaxt="n",border=c("black","red")) - } - - output$mdScatter <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdScatter()) - } - }) - - output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdScatter()) - dev.off() - } - ) - - - ######## Cluster-wise Gene Stats ######### - - #### Heatmap genes #### - output$heatDEtype <- renderUI({ - if (grepl("^Comp",input$res)) { - temp <- list("DE vs tissue average"="deTissue", - "Set A vs Set B"="deMarker") - } else { - temp <- list("DE vs tissue average"="deTissue", - "Marker genes"="deMarker", - "DE vs neighbour"="deNeighb") - } - radioButtons("heatG","Heapmap Genes:",choices=temp) - }) - - output$DEgeneSlider <- renderUI({ - if (length(res()) > 0) { - switch( - input$heatG, - deTissue= - sliderInput("DEgeneCount",min=2,max=max(sapply(d$deTissue[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression of cluster over tissue", - "# of genes per cluster to show",sep="
" - ))), - deMarker= - sliderInput("DEgeneCount",min=2,max=max(sapply(d$deMarker[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression between cluster and all other clusters", - "# of genes per cluster to show",sep="
" - ))), - deNeighb= - sliderInput("DEgeneCount",min=2,max=max(sapply(deNeighb[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression between cluster and nearest neighbour", - "# of genes per cluster to show",sep="
" - )))) - } - }) - - output$DEclustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("DEclustNum","Cluster # for gene list", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } - }) - - heatGenes <- reactive({ - temp <- unique(unlist(lapply( - switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]), - function(X) - if (nrow(X) == 0) { NA } else { rownames(X)[1:input$DEgeneCount] } - ))) - temp <- temp[!is.na(temp)] - return(temp) - }) - - clustMeans <- reactive({ #This only works if input is in ascending order of adjusted p value. - temp <- sapply(d$CGS[[res()]],function(X) X[heatGenes(),"MTC"]) - rownames(temp) <- heatGenes() - return(t(temp)) - }) - - hC <- reactive(hclust(dist(clustMeans()),"single")) - hG <- reactive(hclust(dist(t(clustMeans())),"complete")) - - sepClust <- reactive({ - if (hiC() == "") { - return(c(NA,NA)) - } else { - return(nrow(clustMeans()) - - c(which(levels(clusts())[hC()$order] == hiC()) - 1, - which(levels(clusts())[hC()$order] == hiC()))) - } - }) - - plot_heatmap <- function() { - if (length(levels(clusts())) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Heatmap cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - tempLabRow <- paste(paste0("Cluster ",levels(clusts())), - paste(sapply(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": ") - heatmap.2(clustMeans(),Rowv=as.dendrogram(hC()),Colv=as.dendrogram(hG()),scale="column", - margins=c(9,12),lhei=c(2,10),lwid=c(1,11),trace="none", - keysize=1.5,density.info="none",key.par=list(mar=c(3,.5,2,.5),mgp=2:0), - cexCol=1 + 1/log2(nrow(clustMeans())),cexRow=1 + 1/log2(ncol(clustMeans())), - RowSideColors=clustCols(),labRow=tempLabRow,rowsep=sepClust(),col=viridis(100,d=-1)) - } - } - - output$heatmap <- renderPlot({ - if (length(res()) > 0) { - print(plot_heatmap()) - } - }) - - output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_heatmap()) - dev.off() - } - ) - - output$deGeneSave <- downloadHandler( - filename=function() { paste0(input$heatG,"_",input$DEclustNum,".txt") }, - content=function(file) { - outTable <- switch(input$heatG, - deTissue=d$deTissue[[res()]][[input$DEclustNum]], - deMarker=d$deMarker[[res()]][[input$DEclustNum]], - deNeighb=deNeighb[[res()]][[input$DEclustNum]]) - write.table(outTable,file,quote=F,sep="\t",row.names=T,col.names=NA) - } - ) - - - #### clusterGenes #### - output$genePlotClustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("genePlotClust","Cluster:",selected=cSelected(), - choices=c("",levels(clusts())[!levels(clusts()) == "Unselected"])) - } - }) - - cellMarkCols <- reactive(rainbow2(length(cellMarkers))) - - GOI <- eventReactive(input$GOIgo,grep(input$GOI,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - - plot_clusterGenes <- function() { - doubleDot <- function(col1,col2) { - upper.half.circle <- function(col1){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border=NA) - } - lower.half.circle <- function(col2){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0-cos(rs) - yc <- 0-sin(rs) - polygon(xc,yc,col=col2,border=NA) - } - upper.half.circle(col1) - lower.half.circle(col2) - rs <- seq(0,2*pi,len=200) - polygon(cos(rs),sin(rs),border="white") - } - singleDot <- function(col1){ - rs <- seq(0,2*pi,len=200) - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border="white") - } - par(mar=c(3,3,3,20),mgp=2:0) - if (hiC() == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above", - "to see gene expression for that cluster.",sep="\n")) - } else { - plot(MDTC~DR, - data=d$CGS[[res()]][[hiC()]][ - !((d$CGS[[res()]][[hiC()]]$cMu | d$CGS[[res()]][[hiC()]]$cMs) & - d$CGS[[res()]][[hiC()]]$overCut),], - col=alpha("black",0.3), - xlab="Proportion of cells detecting gene", - ylab="Mean normalized gene expression of detected genes") - title(paste0("Cluster ", hiC(),": ",d$clusterID[[res()]][hiC()]),cex=1.2) - mtext(paste("Cells:",sum(clusts()==hiC()), - " Genes detected:",length(d$CGS[[res()]][[hiC()]]$DR)),side=3,line=0,cex=0.9) - box(col=clustCols()[hiC()],lwd=2) - - if (input$cgLegend == "markers") { - for (x in which(d$CGS[[res()]][[hiC()]]$cMu)) { - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=singleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs)) { - temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))),"&")) - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=doubleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[as.integer(temp[1])], - col2=cellMarkCols()[as.integer(temp[2])])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMu & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))]) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[as.integer(temp[2])]) - } - legend(x=1.05,y=max(d$CGS[[res()]][[hiC()]]$MDTC),xpd=NA,bty="n",ncol=1, - pch=19,col=cellMarkCols(),legend=names(cellMarkersU)) - - } else if (input$cgLegend == "heatmap") { - degl <- rownames(d$CGS[[res()]][[hiC()]]) %in% - rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[hiC()]])[1:input$DEgeneCount] - if (any(degl)) { - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - - } else if (input$cgLegend == "regex" & length(GOI()) > 0) { - degl <- which(rownames(nge) %in% GOI()) - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - } - } - - output$clusterGenes <- renderPlot({ - if (length(res()) > 0) { - print(plot_clusterGenes()) - } - }) - - output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_clusterGenes()) - dev.off() - } - ) - - #### Gene Stats Plot #### - cgGeneOpts <- reactive({ - t <- nearPoints(d$CGS[[res()]][[hiC()]],input$cgClick,xvar="DR",yvar="MDTC") - return(t$genes) - }) - - output$cgSelect <- renderUI({ - if (length(res()) > 0) { - if (input$boxplotGene == "click") { - selectInput("cgGene",label="Gene:",choices=sort(cgGeneOpts())) - } else if (input$boxplotGene == "regex") { - selectInput("cgGene",label="Gene:",choices=sort(GOI())) - } - } - }) - - plot_geneTest <- function() { - if (input$cgGene == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Select a gene by either clicking on the plot above", - "or entering regular expression capturing your gene symbol of interest", - "then pick the gene from the list just above this figure", - "to see a comparison of that gene's expression across all clusters.",sep="\n")) - } else { - temp_pos <- switch(as.character(length(levels(clusts())) > 1),"TRUE"=hC()$order,"FALSE"=1) - layout(matrix(2:1,nrow=2),heights=c(1,4)) - par(mar=c(3,3,0,3),mgp=2:0) - suppressWarnings(boxplot(vector("list",length(levels(clusts()))), - ylim=range(nge[input$cgGene,]), - ylab=paste(input$cgGene,"gene expression (log2)"), - xlab=NA,xaxt="n")) - mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) - mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(get(egDB),keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), - side=1,line=2,font=2) - } - if ("sct" %in% input$bxpOpts) { - bxpCol <- alpha(clustCols(),.2) - } else { - bxpCol <- alpha(clustCols(),.8) - } - for (i in temp_pos) { - boxplot(nge[input$cgGene,clusts() == levels(clusts())[i]], - col=bxpCol[i],at=which(temp_pos == i),add=T,notch=T,outline=F) - if ("sct" %in% input$bxpOpts) { - points(jitter(rep(which(temp_pos == i),sum(clusts() == levels(clusts())[i])),amount=.2), - nge[input$cgGene,clusts() == levels(clusts())[i]],pch=20,col=alpha(clustCols()[i],.4)) - } - } - if ("rnk" %in% input$bxpOpts) { - points(x=seq_along(d$CGS[[res()]]), - y=sapply(d$CGS[[res()]][temp_pos],function(X) X[input$cgGene,"MTCrank"]) * - max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - pch=25,cex=1.2,col="darkred",bg="firebrick2") - axis(side=4,at=seq(0,1,.25) * max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - labels=percent(seq(0,1,.25)),col.ticks="darkred",col.axis="darkred") - mtext(side=4,line=2,text="Quantile of gene expression per cluster",col="darkred") - } - if (length(temp_pos) > 1) { - par(new=F,mar=c(0,3,1,3)) - plot(as.dendrogram(hC()),leaflab="none") - } - } - } - - output$geneTest <- renderPlot({ - if (length(res()) > 0) { - print(plot_geneTest()) - } - }) - - output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_geneTest()) - dev.off() - } - ) - - - ######## Distribution of genes of interest ######### - - GOI1 <- eventReactive(input$GOI1go, - grep(input$GOI1,rownames(nge),value=T,ignore.case=T), - ignoreNULL=F) - output$GOI1select <- renderUI({ - selectInput("goi1",label="Gene:",choices=sort(GOI1()),multiple=T) - }) - - GOI2 <- eventReactive(input$GOI2go, - grep(input$GOI2,rownames(nge),value=T,ignore.case=T), - ignoreNULL=F) - output$GOI2select <- renderUI({ - selectInput("goi2",label="Gene:",choices=sort(GOI2()),multiple=T) - }) - - plot_goi <- function(goi) { - if (length(goi) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("To search for your gene(s) of interest type a", - "search term (regex allowed) in the box above", - "then select the gene(s) from the drop-down list", - "in the \"Gene:\" box above right.",sep="\n")) - } else { - if (length(goi) > 5) { goiL <- 5 } else { goiL <- length(goi) } - if (goiL > 1) { - gv <- apply(nge[goi,],2,max) - } else { - gv <- nge[goi,] - } - cv <- cut(gv,breaks=100,labels=F) - par(mar=c(3,3,goiL+1,1),mgp=2:0) - plot(dr_viz,pch=21,cex=1.3,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(100,.7,d=-1)[cv],bg=viridis(100,.3,d=-1)[cv]) - temp_yrange <- max(dr_viz[,2]) - min(dr_viz[,2]) - segments(x0=seq(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.95),length.out=1000), - y0=max(dr_viz[,2]) + temp_yrange * .045, - y1=max(dr_viz[,2]) + temp_yrange * .065, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.75), - quantile(range(dr_viz[,1]),.95)), - y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), - labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- - select(get(egDB),keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - if (length(tempGeneName) > 4) { - tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] - } - title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) - } - } - } - - output$goiPlot1 <- renderPlot({ - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - output$goiPlot2 <- renderPlot({ - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - - ######## Custom sets for DE ######### - selectedSets <- reactiveValues(a=NULL,b=NULL) - - plot_tsne_selDE <- function() { - par(mar=c(3,3,1,1),mgp=2:0) - plot(dr_viz) - points(dr_viz[selectedSets$a,],pch=19,col=brewer.pal(3,"PRGn")[1]) - points(dr_viz[selectedSets$b,],pch=19,col=brewer.pal(3,"PRGn")[3]) - points(dr_viz[intersect(selectedSets$a,selectedSets$b),],pch=19,col="red") - } - output$tsneSelDE <- renderPlot({ print(plot_tsne_selDE()) }) - - - currSel <- reactive(rownames(brushedPoints(as.data.frame(dr_viz), - input$tsneBrush,xvar="tSNE_1",yvar="tSNE_2"))) - observeEvent(input$addCellsA,{ - selectedSets$a <- append(selectedSets$a,currSel()[!currSel() %in% selectedSets$a]) - }) - observeEvent(input$removeCellsA,{ - selectedSets$a <- selectedSets$a[!selectedSets$a %in% currSel()] - }) - observeEvent(input$addCellsB,{ - selectedSets$b <- append(selectedSets$b,currSel()[!currSel() %in% selectedSets$b]) - }) - observeEvent(input$removeCellsB,{ - selectedSets$b <- selectedSets$b[!selectedSets$b %in% currSel()] - }) - - observeEvent(input$calcDE,{ - newRes <- paste0("Comp.",gsub("[^A-Za-z0-9]","",input$DEsetName)) - if (length(intersect(selectedSets$a,selectedSets$b)) > 0) { - output$calcText <- renderText("Sets can't overlap (please assign red cells to only one set).") - } else if (any(sapply(list(selectedSets$a,selectedSets$b),length) < 3)) { - output$calcText <- renderText("Each set must contain at least 3 cells.") - } else if (nchar(newRes) < 1) { - output$calcText <- renderText("Please name this comparison (in text box above).") - } else if (newRes %in% colnames(d$cl)) { - output$calcText <- renderText("This comparison name has already been used.") - } else { - output$calcText <- renderText("") - withProgress({ - temp <- rep("Unselected",nrow(d$cl)) - names(temp) <- rownames(d$cl) - temp[selectedSets$a] <- "Set A" - temp[selectedSets$b] <- "Set B" - d$cl[[newRes]] <- factor(temp) - - #### Gene stats per set #### - incProgress(amount=1/6,detail="Gene detection rate per set") - setCells <- d$cl[,newRes] != "Unselected" - DR <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) sum(Y>0)/length(Y))) - - incProgress(amount=1/6,detail="Mean detected gene expression per set") - MDTC <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - - incProgress(amount=1/6,detail="Mean gene expression per set") - MTC <- apply(nge,1,function(X) - tapply(X,d$cl[,newRes],mean.logX)) - - d$CGS[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - d$CGS[[newRes]][["Unselected"]] <- data.frame(MTC=MTC["Unselected",]) - for (i in names(d$CGS[[newRes]])) { - d$CGS[[newRes]][[i]]$MTCrank <- rank(d$CGS[[newRes]][[i]]$MTC, - ties.method="min")/nrow(d$CGS[[newRes]][[i]]) - if (i == "Unselected") { next } - d$CGS[[newRes]][[i]]$cMu <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersU) - d$CGS[[newRes]][[i]]$cMs <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersS) - d$CGS[[newRes]][[i]]$overCut <- d$CGS[[newRes]][[i]]$MTC > mean(d$CGS[[newRes]][[i]]$MTC) - d$CGS[[newRes]][[i]]$genes <- rownames(d$CGS[[newRes]][[i]]) - } - if (length(cellMarkers) < 1) { - d$clusterID[[newRes]] <- rep("",nrow(cl)) - } else { - d$clusterID[[newRes]] <- c(names(cellMarkers)[sapply(d$CGS[[newRes]],function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))], - "Unselected") - names(d$clusterID[[newRes]]) <- c(names(d$CGS[[newRes]]),"Unselected") - } - - #### deTissue - DE per cluster vs all other data #### - incProgress(amount=1/6,detail="DE vs tissue logFC calculations") - deT_logFC <- sapply(levels(d$cl[,newRes])[1:2],function(i) - MTC[i,] - apply(nge[,d$cl[,newRes] != i],1,mean.logX)) - deT_genesUsed <- apply(deT_logFC,2,function(X) which(X > logFCthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logFCthresh should be set to less than ", - min(apply(deT_logFC,2,function(X) max(abs(X)))), - ", the largest magnitude logFC between cluster ", - names(which.min(apply(deT_logFC,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - incProgress(amount=1/6,detail="DE vs tissue Wilcoxon rank sum calculations") - deT_pVal <- sapply(levels(d$cl[,newRes])[1:2],function(i) - apply(nge[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[d$cl[,newRes] == i],X[d$cl[,newRes] != i])$p.value),simplify=F) - d$deTissue[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(i) - data.frame(logFC=deT_logFC[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply( - p.adjust(do.call(rbind,d$deTissue[[newRes]])$pVal,"fdr"), - rep(names(sapply(d$deTissue[[newRes]],nrow)),sapply(d$deTissue[[newRes]],nrow)), - c) - for (i in names(d$deTissue[[newRes]])) { - d$deTissue[[newRes]][[i]] <- d$deTissue[[newRes]][[i]][tempQval[[i]] <= WRSTalpha,] - d$deTissue[[newRes]][[i]]$qVal <- tempQval[[i]][tempQval[[i]] <= WRSTalpha] - } - - #### deMarker - DE per cluster vs each other cluster #### - incProgress(amount=1/6,detail="Calculating Set A vs Set B") - - deM_dDR <- DR["Set A",] - DR["Set B",] - deM_logFC <- MTC["Set A",] - MTC["Set B",] - deM_genesUsed <- switch(threshType, - dDR=which(abs(deM_dDR) > dDRthresh), - logFC=which(abs(deM_logFC) > logFCthresh)) - if (length(deM_genesUsed) < 1) { - stop("Gene filtering threshold is set too high.") - } - - deM_pVal <- apply(nge[deM_genesUsed,],1,function(X) - wilcox.test(X[d$cl[,newRes] == "Set A"], - X[d$cl[,newRes] == "Set B"])$p.value) - - temp_deVS <- data.frame(dDR=deM_dDR[deM_genesUsed], - logFC=deM_logFC[deM_genesUsed], - pVal=deM_pVal)[order(deM_pVal),] - temp_deVS$qVal <- p.adjust(temp_deVS$pVal,"fdr") - - d$deMarker[[newRes]] <- list( - "Set A"=temp_deVS[temp_deVS[,threshType] > 0 & temp_deVS$qVal <= WRSTalpha,], - "Set B"=temp_deVS[temp_deVS[,threshType] < 0 & temp_deVS$qVal <= WRSTalpha,] - ) - d$deMarker[[newRes]][["Set B"]]$dDR <- d$deMarker[[newRes]][["Set B"]]$dDR * -1 - d$deMarker[[newRes]][["Set B"]]$logFC <- d$deMarker[[newRes]][["Set B"]]$logFC * -1 - - - selectedSets$a <- selectedSets$b <- NULL - },message="DE calculations:") - - - } - }) - -} - - -########## ShinyApp ########## -shinyApp(ui = ui, server = server) \ No newline at end of file diff --git a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_deVS.RData b/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_deVS.RData deleted file mode 100644 index 2a77b66..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_deVS.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_forViz.RData b/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_forViz.RData deleted file mode 100644 index c4cadf6..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_forViz.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_savedRes.RData b/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_savedRes.RData deleted file mode 100644 index e0bbebd..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e11/e11_Cortical_Only_savedRes.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e11/intro.md b/ToBeConvertedToPkg/meCortex/e11/intro.md deleted file mode 100644 index 2602d0d..0000000 --- a/ToBeConvertedToPkg/meCortex/e11/intro.md +++ /dev/null @@ -1,4 +0,0 @@ -Welcome to the data portal for the 2017 Cell Reports paper [Developmental Emergence of Adult Neural Stem Cells as Revealed by Single-Cell Transcriptional Profiling](https://doi.org/10.1016/j.celrep.2017.12.017) by Yuzwa *et al.*, brought to you by [scClustViz](https://baderlab.github.io/scClustViz). These are embryonic day 11.5 cortically-derived cells. -Other datasets: [E13.5 Cerebral Cortex](https://innesbt.shinyapps.io/e13cortex/); [E15.5 Cerebral Cortex](https://innesbt.shinyapps.io/e15cortex/); [E17.5 Cerebral Cortex](https://innesbt.shinyapps.io/e17cortex/) - -Currently this dataset is being used to demo an updated version of scClustViz with differential expression testing between user-selected sets of cells (beta version). This is done by designating the cells per set (see panel at bottom of the page), then after calculations are complete, the comparison is stored as another cluster resolution that can be selected from the pulldown menu in the first panel. diff --git a/ToBeConvertedToPkg/meCortex/e11/rsconnect/documents/RunVizScript.R/shinyapps.io/innesbt/runvizscript.dcf b/ToBeConvertedToPkg/meCortex/e11/rsconnect/documents/RunVizScript.R/shinyapps.io/innesbt/runvizscript.dcf deleted file mode 100644 index feaaea9..0000000 --- a/ToBeConvertedToPkg/meCortex/e11/rsconnect/documents/RunVizScript.R/shinyapps.io/innesbt/runvizscript.dcf +++ /dev/null @@ -1,10 +0,0 @@ -name: runvizscript -title: -username: -account: innesbt -server: shinyapps.io -hostUrl: https://api.shinyapps.io/v1 -appId: 361580 -bundleId: 1431687 -url: https://innesbt.shinyapps.io/runvizscript/ -when: 1529078133.82426 diff --git a/ToBeConvertedToPkg/meCortex/e11/rsconnect/shinyapps.io/innesbt/e11cortex.dcf b/ToBeConvertedToPkg/meCortex/e11/rsconnect/shinyapps.io/innesbt/e11cortex.dcf deleted file mode 100644 index b21ed69..0000000 --- a/ToBeConvertedToPkg/meCortex/e11/rsconnect/shinyapps.io/innesbt/e11cortex.dcf +++ /dev/null @@ -1,10 +0,0 @@ -name: e11cortex -title: -username: -account: innesbt -server: shinyapps.io -hostUrl: https://api.shinyapps.io/v1 -appId: 365571 -bundleId: 1463887 -url: https://innesbt.shinyapps.io/e11cortex/ -when: 1530676542.41646 diff --git a/ToBeConvertedToPkg/meCortex/e13/app.R b/ToBeConvertedToPkg/meCortex/e13/app.R deleted file mode 100644 index fe32036..0000000 --- a/ToBeConvertedToPkg/meCortex/e13/app.R +++ /dev/null @@ -1,1986 +0,0 @@ -######## User-defined variables ######## - -dataPath <- "e13_Cortical_Only_forViz.RData" -## ^ Point this to the output file from PrepareInputs.R -## If you set a default resolution in the Shiny app, it will save to the same directory. - -vizScriptPath <- "./" -## ^ Point this to the directory in which the "app.R" Shiny script resides - -species <- "mouse" -## ^ Set species ("mouse"/"human"). -## If other, add the annotation database from Bioconductor to the egDB <- switch() expression below. - - -#### List known cell-type markers #### -cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna","Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b","Tle4", - "Nes","Cux1","Cux2","Tubb3","Rbfox3","Dcx")) -#cellMarkers <- list() -## ^ If you have canonical marker genes for expected cell types, list them here -## (see example above from mouse embryonic cortex). The Shiny app will attempt -## to label clusters in the tSNE projection by highest median gene expression. -## Otherwise leave the list blank (uncomment line above). - - -#### Variables for differential expression analysis #### -exponent <- 2 -## ^ log base of your normalized input data. -## Seurat defaults to natural log (set this to exp(1)), -## other methods are generally log2 (set this to 2). -pseudocount <- 1 -## ^ pseudocount added to all log-normalized values in your input data. -## Most methods use a pseudocount of 1 to eliminate log(0) errors. - -#threshType <- "logGER" # use a fold-change-based threshold for filtering genes prior to DE testing -threshType <- "dDR" # use a difference in detection rate threshold for filtering -## Filtering genes for use in differential expression testing can be done multiple ways. -## We use a fold-change filter for comparing each cluster to the tissue as a whole, but find that -## difference in detection rates works better when comparing clusters to each other. You can set -## threshType to "logGER" to use fold-change for all gene filtering if you'd prefer. - -logGERthresh <- 1 # magnitude of mean log-expression fold change between clusters to use as filter. -dDRthresh <- 0.15 # magnitude of detection rate difference between clusters to use as filter. -WRSTalpha <- 0.01 # significance level for DE testing using Wilcoxon rank sum test - - -######################################## - - - -######## Code to run the Shiny app ######## -library(markdown) -library(shiny) -library(cluster) -library(gplots) -library(scales) -library(viridis) -library(RColorBrewer) -library(TeachingDemos) - -library(org.Mm.eg.db) -egDB <- "org.Mm.eg.db" - -mean.logX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -if (length(cellMarkers) < 1) { - cellMarkersS <- cellMarkersU <- list() -} else { - cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2, - function(X) do.call(intersect,unname(cellMarkers[X]))) - try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2, - function(X) paste(X,collapse="&")),silent=T) - cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] - cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) -} - -demoRegex <- switch(species,mouse="^Actb$",human="^ACTB$") - - -load(dataPath) -temp_dataPath <- strsplit(dataPath,"/|\\\\") -dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",dataPath) -if (dataPath == "") { dataPath <- "./" } -dataTitle <- sub("\\..+$|_forViz\\..+$","",temp_dataPath[[1]][length(temp_dataPath[[1]])]) -rm(temp_dataPath) - -for (selDEfile in grep(paste0("^",dataTitle,".+selDE.+RData$"),list.files(dataPath),value=T)) { - temp <- load(paste0(dataPath,selDEfile)) - cl <- cbind(cl,new_cl) - CGS <- append(CGS,new_CGS) - deTissue <- append(deTissue,new_deTissue) - deMarker <- append(deMarker,new_deMarker) - rm(list=temp) -} - -if (file.exists(paste0(dataPath,dataTitle,"_savedRes.RData"))) { - load(paste0(dataPath,dataTitle,"_savedRes.RData")) -} else { - savedRes <- NULL -} - -if (!file.exists(paste0(dataPath,"intro.md"))) { - write(paste0(dataTitle,": You can add to this preamble by editting ",dataPath,"intro.md"), - file=paste0(dataPath,"intro.md")) -} - -silDist <- dist(dr_clust) -## ^ precalculating distances in reduced dimensionality space for the silhouette plot. - -for (l in names(CGS)) { - for (i in names(CGS[[l]])) { - CGS[[l]][[i]]$MTCrank <- rank(CGS[[l]][[i]]$MTC,ties.method="min")/nrow(CGS[[l]][[i]]) - if (i == "Unselected") { next } - CGS[[l]][[i]]$cMu <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersU) - CGS[[l]][[i]]$cMs <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersS) - CGS[[l]][[i]]$overCut <- CGS[[l]][[i]]$MTC > mean(CGS[[l]][[i]]$MTC) - CGS[[l]][[i]]$genes <- rownames(CGS[[l]][[i]]) - } -} - -if (length(cellMarkers) < 1) { - clusterID <- sapply(names(CGS),function(X) sapply(CGS[[X]],function(Z) return("")),simplify=F) -} else if (!any(unlist(cellMarkers) %in% rownames(nge))) { - warning(paste("None of the provided cellMarkers are found in the data", - "(check your gene IDs against rownames in your data).")) - clusterID <- sapply(names(CGS),function(X) sapply(CGS[[X]],function(Z) return("")),simplify=F) -} else { - clusterID <- sapply(CGS,function(Z) { - temp <- names(cellMarkers)[sapply(Z,function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))] - names(temp) <- names(Z) - temp[names(temp) == "Unselected"] <- "Unselected" - return(temp) - },simplify=F) -} - -#### Run the Shiny App! #### - - -########## UI ########## -ui <- fixedPage( - fixedRow( - titlePanel(paste("scClustViz -",dataTitle)), - includeMarkdown(paste0(dataPath,"intro.md")) - ), - hr(), - - ######## Clustering Solution Selection ######## - fixedRow( - titlePanel("Clustering Solution Selection"), - p(paste("Here you can compare the results of clustering at different resolutions to", - "determine the appropriate clustering solution for your data. You can see the", - "cluster solutions represented as boxplots on the left, where each boxplot", - "represents the number of genes differentially expressed between each cluster", - "and its nearest neighbour, or marker genes per cluster. The cluster selected", - "in the pulldown menu is highlighted in red, and the silhouette plot for that", - "cluster is shown on the right.")), - p(paste("A silhouette plot is a horizontal barplot where each bar is a cell, grouped by", - "cluster. The width of each bar represents the difference between mean distance", - "to other cells within the cluster and mean distance to cells in the nearest", - "neighbouring cluster. Distance is Euclidean in reduced dimensional space.", - "Positive silhouettes indicate good cluster cohesion.")), - p(paste("Once you've selected an appropriate cluster solution (we suggest picking one", - "where all nearest neighbouring clusters have differentially expressed genes", - "between them), click 'View clusters at this resolution' to proceed. If you", - "want to save this cluster solution as the default for next time, click 'Save", - "this resolution as default'. All figures can be downloaded as PDFs by clicking", - "the buttons next to each figure.")), - h1() - ), - fixedRow( - column(6, - fixedRow(column(6,uiOutput("resSelect"),align="left"), - column(6,align="right", - actionButton("go","View clusters at this resolution",icon("play"), - style="color: #fff; background-color: #008000"), - uiOutput("saveButton") - ) - ), - radioButtons("deType",NULL,list("# of DE genes to nearest neighbouring cluster"="deNeighb", - "# of marker genes per cluster"="deMarker"),inline=T), - plotOutput("cqPlot",height="500px")), - column(6,plotOutput("sil",height="600px")) - ), - fixedRow( - column(6,downloadButton("cqPlotSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") - ), - hr(), - - ######## Dataset and Cluster Metadata Inspection ######## - fixedRow( - titlePanel("Dataset and Cluster Metadata Inspection"), - p(paste("Here you can explore your dataset as a whole: cluster assignments for all", - "cells; metadata overlays for cell projections; and figures for comparing", - "both numeric and categorical metadata. The top two figures show cells", - "projected into 2D space, where proximity indicates transcriptional similarity.", - "On the left you can see cluster assignments and the nearest neighbours used in", - "the differential expression calculations. If cell type marker genes were", - "provided in RunVizScript.R, it will also show predicted cell type annotations.", - "On the right you can add a metadata overlay to the cell projection. Below", - "you can view relationships in the metadata as a scatterplot or compare clusterwise", - "distributions of metadata as bar- or box-plots. If you select a cluster of interest", - "(by clicking on a cell in the top-left plot, or from the list two sections down)", - "it will be highlighted for comparison in these figures.")), - strong(paste("You can select any cluster for further assessment by clicking on a cell", - "from that cluster in the top-left figure.")), - h1() - ), - fixedRow( - column(6, - if (length(cellMarkers) > 0 & !all(unlist(clusterID) == "")) { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn", - "Cluster annotations"="ca", - "Cluster annotations (label all)"="can")) - } else { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn")) - }, - checkboxInput("nnArrow",value=F,width="100%", - label="Show nearest neighbouring clusters by # of DE genes.") - ), - - column(4,selectInput("tsneMDcol",label="Metadata:",width="100%",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(2,uiOutput("tsneMDlog")) - ), - fixedRow( - column(6,plotOutput("tsne",height="580px",click="tsneClick")), - column(6,plotOutput("tsneMD",height="580px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("tsneMDSave","Save as PDF")) - ), - hr(), - - fixedRow( - column(2,selectInput("mdScatterX","X axis:",choices=colnames(md),selected="total_counts")), - column(2,selectInput("mdScatterY","Y axis:",choices=colnames(md),selected="total_features")), - column(2,uiOutput("scatterLog")), - - column(3,selectInput("mdFactorData","Metadata:",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(3,uiOutput("mdFactorOpts")) - ), - fixedRow( - column(6,plotOutput("mdScatter",height="560px")), - column(6,plotOutput("mdFactor",height="560px")) - ), - fixedRow( - column(6,align="left",downloadButton("mdScatterSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) - ), - hr(), - - ######## Differentially Expressed Genes per Cluster ######### - fixedRow( - titlePanel("Differentially Expressed Genes per Cluster"), - p(HTML(paste("Here you can explore the significantly differentially expressed genes per", - "cluster. 'DE vs Rest' refers to positively differentially expressed genes", - "when comparing a cluster to the rest of the cells as a whole. 'Marker genes'", - "refers to genes positively differentially expressed versus all other clusters", - "in a series of pairwise tests. 'DE vs neighbour' refers to genes positively", - "differentially expressed versus the nearest neighbouring cluster, as measured", - "by number of differentially expressed genes between clusters. In all cases,", - "Wilcoxon rank-sum tests are used, with a",percent(WRSTalpha),"false detection", - "rate threshold."))), - p(paste("The heatmap is generated using the differentially expressed genes from the test", - "and number of genes selected below. Differentially expressed gene lists can be", - "downloaded as tab-separated text files by selecting the test type and cluster,", - "and clicking 'Download gene list'. Genes used in the heatmap can be viewed in", - "the gene expression plots below as well.")), - h1() - - ), - - fixedRow( - column(2,uiOutput("heatDEtype")), - column(6,uiOutput("DEgeneSlider")), - column(2,uiOutput("DEclustSelect")), - column(2,downloadButton("deGeneSave","Download gene list"), - downloadButton("heatmapSave","Save as PDF"),align="right") - ), - fixedRow(plotOutput("heatmap",height="640px")), - hr(), - - ######### Gene Expression Distributions per Cluster ######### - fixedRow( - titlePanel("Gene Expression Distributions per Cluster"), - p(paste("Here you can investigate the expression of individual genes per cluster and", - "across all clusters. The first plot shows mean expression of genes in a cluster", - "as a function of their detection rate and transcript count when detected. The", - "x-axis indicates the proportion of cells in the cluster in which each gene was", - "detected (transcript count > 0), while the y-axis shows the mean normalized", - "transcript count for each gene from the cells in the cluster in which that gene", - "was detected. You can select the cluster to view from the menu below, and genes", - "can be labelled in the figure based on the cell-type markers provided in", - "RunVizScipt.R, the differentially expressed genes from the selected cluster in", - "the above heatmap, or by searching for them in the box below the figure.")), - p(paste("Clicking on the first plot will populate the list of genes near the point clicked,", - "which can be found above the next figure. By selecting a gene from this list,", - "you can compare the expression of that gene across all clusters in the second figure.", - "This list can also be populated using the gene search feature. Plotting options", - "for the second figure include the option to overlay normalized transcript count", - "from each cell in the cluster over their respective boxplots ('Include scatterplot'),", - "and the inclusion of the percentile rank of that gene's expression per cluster as", - "small triangles on the plot using the right y-axis ('Include gene rank').")), - h1() - ), - fixedRow( - column(3,uiOutput("genePlotClustSelect")), - column(9,if (length(cellMarkers) > 0) { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Cell-type markers"="markers", - "Top DE genes (from heatmap)"="heatmap", - "Gene symbols from search box below"="search")) - } else { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Top DE genes (from heatmap)"="heatmap", - "Gene symbols from search box below"="search")) - }) - ), - fixedRow(align="right", - plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") - ), - - #### Gene expression comparison #### - fixedRow( - column(3,radioButtons("searchType",label="Search by:", - choices=c("Gene list (comma-separated)"="comma", - "Regular expression"="regex"))), - column(8,uiOutput("geneSearchBox")), - column(1,actionButton("GOIgo","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOIgo { margin-top: 25px; margin-left: -25px; }"), - fixedRow( - column(2,uiOutput("cgSelect")), - column(5,radioButtons("boxplotGene",inline=T, - label="Genes of interest (to populate list):", - choices=c("From click on plots above or below"="click", - "From gene search"="search"))), - column(5,checkboxGroupInput("bxpOpts",label="Figure options:", - selected=c("sct","rnk","notch"),inline=T, - choices=list("Include scatterplot"="sct", - "Include gene rank"="rnk", - "Show notch"="notch"))) - ), - fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") - ), - hr(), - - ######## Cluster comparison ######### - fixedRow( - titlePanel("Cluster/Set Comparison of Gene Statistics"), - p(HTML(paste("Here you can directly compare gene expression statistics between clusters.", - "Any clusters from the currently selected cluster solution can be compared,", - "and you can switch cluster solutions from the menu here for convenience.", - "The stats that can be compared are mean normalized transcript count per", - "cluster (Mean gene expression), proportion of cells in a cluster in", - "which each gene was detected (Detection rate), and mean normalized", - "transcript count in cells of the cluster in which the gene was detected", - "(Mean detected gene expression). Genes can be labelled based on", - "differential expression from the heatmap above, or using the gene search", - "feature above."))), - p(paste("The most different genes in the current comparison can also be labelled. This", - "calculation can simply be subtracting the gene stat of the x-axis cluster from", - "that of the y-axis, or distance (residual) from the line of best fit. The latter", - "calculation may be of value if there is concern that a technical factor such as", - "library size is confounding a direct comparison between clusters. In either case,", - "the resulting values can be downloaded as a ranked list where positive values are", - "higher in the cluster on the y-axis, and negative values are higher in the x-axis", - "cluster. Since this list ranks all genes in the experiment, it could be used as an", - "input for GSEA.")), - h1() - ), - fixedRow( - column(7,plotOutput("setScatter",height="640px",click="scatterClick")), - column(5, - fixedRow( - column(10,uiOutput("resSelect2")), - column(2,actionButton("go2","View",icon("play"), - style="color: #fff; background-color: #008000")) - ), - fixedRow(column(12,uiOutput("saveButton2"))), - fixedRow( - column(6,uiOutput("setScatterY")), - column(6,uiOutput("setScatterX")) - ), - fixedRow( - column(7,radioButtons("scatterInput",label="Gene stat to display:", - choices=c("Mean gene expression"="MTC", - "Detection rate"="DR", - "Mean detected gene expression"="MDTC"))), - column(5,radioButtons("scatterLine",label="Difference calculation:", - choices=c("Subtraction"="sub","From line of best fit"="lbf"))) - ), - fixedRow(column(12,radioButtons("diffLabelType",label="Label genes by:", - choices=c("Most different by calculation"="diff", - "Top DE genes (from heatmap)"="de", - "Genes symbols from search box above"="search"), - inline=T))), - fixedRow(column(12,uiOutput("diffLabelSelect"))), - fixedRow( - column(4,checkboxInput("scatterLabelAngle",label="Flip label angle",value=F)), - column(4,downloadButton("setScatterSave","Save as PDF"),align="right"), - column(4,downloadButton("setComparisonSave","Download ranked list")) - ) - ) - ),tags$style(type='text/css',paste("button#go2 { margin-top: 25px; margin-left: -25px; }", - "button#updateForViz2 { margin-top: -25px; }")), - - hr(), - - ######## Custom sets for DE ######### - fixedRow(titlePanel("Manually Select Cells for DE Testing")), - fixedRow( - column(8,plotOutput("tsneSelDE",brush="tsneBrush",height="750px")), - column(4, - p(paste("Here you can select cells to further explore using the figures above.", - "Click and drag to select cells, and use the buttons below to add them", - "to a set of cells. When your sets are ready, name the comparison and", - "click the 'Calculate differential gene expression' button. Once the", - "calculation is done the comparison will be added to the cluster list", - "at the top of the page and the current cluster solution will be updated", - "to show this comparison. The comparison can be saved by clicking 'Save", - "this comparison to disk' next to either cluster solution menu.")), - hr(), - selectInput("tsneSelDEcol","Metadata overlay:",choices=c("",colnames(md))), - hr(), - column(6,htmlOutput("textSetA"), - actionButton("addCellsA","Set A: Add Cells",icon("plus"), - style="color: #fff; background-color: #a50026"), - actionButton("removeCellsA","Set A: Remove Cells",icon("minus"), - style="color: #a50026; background-color: #fff; border-color: #a50026") - ), - column(6,htmlOutput("textSetB"), - actionButton("addCellsB","Set B: Add Cells",icon("plus"), - style="color: #fff; background-color: #313695"), - actionButton("removeCellsB","Set B: Remove Cells",icon("minus"), - style="color: #313695; background-color: #fff; border-color: #313695") - ), - htmlOutput("textOverlap"), - hr(), - textInput("DEsetName","Short name for this comparison:", - placeholder="A-z0-9 only please"), - actionButton("calcDE","Calculate differential gene expression",icon("play")), - hr(), - span(textOutput("calcText"),style="color:red") - ) - ), - hr(), - - ######## Distribution of genes of interest ######### - fixedRow( - titlePanel("Cell Distribution of Genes of Interest"), - p(paste("Here you can overlay gene expression values for individual genes of interest", - "on the cell projection. Search for your gene using the search box below,", - "then select your gene(s) of interest from the dropdown 'Select genes' menu.", - "You can select multiple genes, but note that for each cell only the gene", - "expression of the gene with the highest expression in that cell will be displayed.", - "You have the option to include the cluster labels from the first cell projection", - "figure in these plots, and to colour the clusters themselves. There are two", - "copies of this figure for ease of comparison between genes of interest.")), - h1() - ), - fixedRow( - column(2,radioButtons("searchType1",label="Search by:", - choices=c("Gene list"="comma", - "Regular expression"="regex"))), - column(3,uiOutput("geneSearchBox1")), - column(1,actionButton("GOI1go","Search",icon=icon("search"))), - - column(2,radioButtons("searchType2",label="Search by:", - choices=c("Gene list"="comma", - "Regular expression"="regex"))), - column(3,uiOutput("geneSearchBox2")), - column(1,actionButton("GOI2go","Search",icon=icon("search"))) - ),tags$style(type='text/css', paste("button#GOI1go { margin-top: 25px; margin-left: -25px; }", - "button#GOI2go { margin-top: 25px; margin-left: -25px; }")), - - fixedRow( - column(3, - radioButtons("plotClust1",inline=T,label="Plot:",selected="goi", - choices=list("Clusters"="clust","Gene expression overlay"="goi")), - checkboxInput("plotLabel1",label="Include cluster labels (style as above)",value=T) - ), - column(3,uiOutput("GOI1select")), - - column(3, - radioButtons("plotClust2",inline=T,label="Plot:",selected="goi", - choices=list("Clusters"="clust","Gene expression overlay"="goi")), - checkboxInput("plotLabel2",label="Include cluster labels (style as above)",value=T) - ), - column(3,uiOutput("GOI2select")) - ), - - fixedRow( - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")), - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")) - ), - fixedRow( - column(6,plotOutput("goiPlot1",height="600px")), - column(6,plotOutput("goiPlot2",height="600px")) - ), - fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) - ), - h1() -) - - -########## Server ########## -server <- function(input,output,session) { - d <- reactiveValues(cl=cl,CGS=CGS, - clusterID=clusterID, - deTissue=deTissue, - deMarker=deMarker) - - clustCols <- function(res) { - if (grepl("^Comp",res)) { - c(brewer.pal(3,"PRGn")[c(1,3)],"grey80") - } else if (length(levels(d$cl[,res])) <= 8) { - brewer.pal(length(levels(d$cl[,res])),"Dark2")[1:length(levels(d$cl[,input$res]))] - } else { - rainbow2(length(levels(d$cl[,res]))) - } - } - - - ######## Cluster Resolution Selection ######## - #### Inter-cluster DE boxplots #### - numClust <- sapply(cl[,!grepl("^Comp",colnames(cl))],function(X) length(levels(X))) - clustList <- reactive({ - temp <- as.list(colnames(d$cl)) - names(temp)[seq_along(numClust)] <- paste0(unlist(temp)[seq_along(numClust)], - ": ",numClust," clusters") - if (length(temp) > length(numClust)) { - names(temp)[setdiff(seq_along(temp),seq_along(numClust))] <- - paste0("Comparison: ", - sub("Comp.","",fixed=T, - x=unlist(temp)[setdiff(seq_along(temp),seq_along(numClust))])) - } - return(temp) - }) - output$resSelect <- renderUI({ - if (is.null(res())) { temp_sel <- savedRes} else { temp_sel <- res() } - selectInput("res","Resolution:",choices=clustList(),selected=temp_sel) - }) - output$saveButton <- renderUI({ - if (grepl("^Comp",input$res)) { - actionButton("updateForViz","Save this comparison to disk",icon("save")) - } else { - actionButton("save","Save this resolution as default",icon("bookmark")) - } - }) - numClust <- numClust[numClust > 1] - - plot_cqPlot <- function() { - numDEgenes <- lapply(get(input$deType)[!grepl("^Comp",names(get(input$deType)))], - function(X) sapply(X,nrow)) - toplim <- c(21,max(unlist(numDEgenes)) + 20) - botlim <- c(-1,21) - - if (grepl("^Comp",input$res)) { - par(mar=c(3,3.5,1,1)) - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Press 'View clusters at this resolution'", - "to view the comparison", - sub("Comp.","",input$res,fixed=T),sep="\n")) - } else { - par(mar=c(0.2,3.5,1,1),mgp=2:0,mfrow=c(2,1)) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=toplim,yaxs="i",xaxt="n",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - - par(mar=c(3,3.5,0.2,1),mgp=2:0) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=botlim,yaxs="i",xlab="Number of clusters",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - mtext(switch(input$deType, - "deMarker"="Positive DE genes per cluster to all other clusters", - "deNeighb"="Positive DE genes per cluster to nearest cluster") - ,side=2,line=2.5,at=botlim[2],xpd=NA) - } - } - - output$cqPlot <- renderPlot({ - print(plot_cqPlot()) - }) - - output$cqPlotSave <- downloadHandler( - filename="cqPlot.pdf", - content=function(file) { - pdf(file,width=7,height=6) - print(plot_cqPlot()) - dev.off() - } - ) - - #### Silhouette plot #### - plot_sil <- function() { - tempSil <- silhouette(as.integer(d$cl[,input$res]),dist=silDist) - par(mar=c(4.5,.5,1.5,1.5),mgp=2:0) - if (length(tempSil) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Silhouette plot cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - plot(tempSil,beside=T,border=NA,main=NA,col=clustCols(input$res),do.n.k=T) - } - } - - output$sil <- renderPlot({ - print(plot_sil()) - }) - - output$silSave <- downloadHandler( - filename="sil.pdf", - content=function(file) { - pdf(file,width=6,height=7) - print(plot_sil()) - dev.off() - } - ) - - #### Resolution selection buttons #### - res <- reactiveVal() - observeEvent(input$go,res(input$res),ignoreNULL=F) - observeEvent(input$go2,res(input$res2),ignoreNULL=F) - - observeEvent(input$save,{ - savedRes <<- input$res #<<- updates variable outside scope of function (ie. global environment) - save(savedRes,file=paste0(dataPath,dataTitle,"_savedRes.RData")) - }) - - - ######## Cell-type Clusters ######## - clusts <- reactive(d$cl[,res()]) - - #### Cell-type tSNE #### - plot_tsne_labels <- function() { - if (input$tsneLabels == "ca") { - temp_labelNames <- sapply(unique(d$clusterID[[res()]]),function(X) - names(which(d$clusterID[[res()]] == X)),simplify=F) - temp_labels <- apply(dr_viz,2,function(Y) - tapply(Y,apply(sapply(temp_labelNames,function(X) clusts() %in% X),1,which),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=names(temp_labelNames),font=2,cex=1.5) - } else if (input$tsneLabels == "can") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=d$clusterID[[res()]],font=2,cex=1.5) - } else if (input$tsneLabels == "cn") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=levels(clusts()),font=2,cex=1.5) - } else { - legend("center",legend="You changed the label choice names...") - } - } - - plot_tsne <- function() { - par(mar=c(3,3,4,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs"), - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(clustCols(res())[clusts()],0.2)[!ci()], - bg=alpha(clustCols(res())[clusts()],0.1)[!ci()]) - points(dr_viz[ci(),],pch=21, - col=alpha(clustCols(res())[clusts()],1)[ci()], - bg=alpha(clustCols(res())[clusts()],0.5)[ci()]) - } else { - points(dr_viz,pch=21, - col=alpha(clustCols(res())[clusts()],1), - bg=alpha(clustCols(res())[clusts()],0.5)) - } - if (hiC() != "") { - mtext(side=3,line=-1,text=paste("Cluster",hiC(),"-", - d$clusterID[[res()]][hiC()],"-", - sum(clusts() == hiC()),"cells")) - } - if (input$nnArrow) { - temp_nn <- sapply(deNeighb[[res()]],function(X) - unique(gsub(pattern="^vs\\.|\\.[A-Za-z]+?$","",colnames(X))),simplify=F) - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - sapply(names(temp_nn),function(X) - arrows(lwd=2,col=alpha("black",0.5),length=0.1, - x0=temp_labels[X,1],y0=temp_labels[X,2], - x1=temp_labels[temp_nn[[X]],1],y1=temp_labels[temp_nn[[X]],2])) - } - } - - output$tsne <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsne()) - print(plot_tsne_labels()) - } - }) - - output$tsneSave <- downloadHandler( - filename="tsne.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsne()) - print(plot_tsne_labels()) - dev.off() - } - ) - - #### clusterSelect #### - - - clusterSelect <- reactiveValues(cl=NULL) - - observeEvent(input$tsneClick,{ clusterSelect$cl <- input$tsneClick }) - - cSelected <- reactive({ - t <- nearPoints(as.data.frame(dr_viz),clusterSelect$cl,xvar="tSNE_1",yvar="tSNE_2",threshold=5) - t2 <- d$cl[rownames(t)[1],res()] - if (is.na(t2)) { - return("") - } else if (t2 == "Unselected") { - return("") - } else { - return(t2) - } - }) - - hiC <- reactive({ - if (length(res()) < 1) { - return("") - } else if (input$genePlotClust != "") { - d$cl[which(d$cl[,res()] == input$genePlotClust)[1],res()] - } else { - return(input$genePlotClust) - } - }) - - ci <- reactive({ - if (hiC() == "") { - rep(F,length(clusts())) - } else { - clusts() == hiC() - } - }) - - #### Metadata tSNE overlay #### - output$tsneMDlog <- renderUI({ - if (!(is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol]))) { - checkboxGroupInput("tsneMDlog",label="Colour scale", - choices=c("Log scale"="log"),width="100%") - } - }) - - plot_tsneMD <- function() { - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - id <- as.factor(md[,input$tsneMDcol]) - if (length(levels(md[,input$tsneMDcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneMDcol])), - "Dark2")[1:length(levels(md[,input$tsneMDcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneMDcol]))) - } - } else { - if ("log" %in% input$tsneMDlog) { - id <- cut(log10(md[,input$tsneMDcol]),100) - } else { - id <- cut(md[,input$tsneMDcol],100) - } - idcol <- viridis(100,d=-1) - } - layout(cbind(2:1),heights=c(1,9)) - par(mar=c(3,3,0,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(idcol,.1)[id[!ci()]], - bg=alpha(idcol,0.05)[id[!ci()]]) - points(dr_viz[ci(),],pch=21, - col=alpha(idcol,.8)[id[ci()]], - bg=alpha(idcol,0.4)[id[ci()]]) - } else { - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - } - plot_tsne_labels() - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - par(mar=c(0,0,0,0)) - plot.new() - legend("bottom",bty="n",horiz=T,pch=c(NA,rep(21,length(levels(md[,input$tsneMDcol])))), - legend=c(paste0(input$tsneMDcol,":"),levels(md[,input$tsneMDcol])), - col=c(NA,idcol),pt.bg=c(NA,alpha(idcol,0.5))) - } else { - if ("log" %in% input$tsneMDlog) { - tempMain <- paste(input$tsneMDcol,"(log scale)") - } else { - tempMain <- input$tsneMDcol - } - par(mar=c(0,5,3,3)) - barplot(rep(1,100),space=0,col=idcol,xaxt="n",yaxt="n",border=NA,main=tempMain) - text(x=c(1,100),y=1,pos=c(2,4),xpd=NA,labels=round(range(md[,input$tsneMDcol]),2)) - } - } - - output$tsneMD <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsneMD()) - } - }) - - output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_tsneMD()) - dev.off() - } - ) - - #### Metadata Scatterplot #### - output$scatterLog <- renderUI({ - if ((is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) | - (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY]))) { - checkboxGroupInput("scatterLog",inline=F,label=NULL, - choices=c("Log x axis"="x","Log y axis"="y","Show notch"="notch"), - selected="notch") - } else { - checkboxGroupInput("scatterLog",inline=F,label=NULL, - choices=c("Log x axis"="x","Log y axis"="y")) - } - }) - - plot_mdScatter <- function() { - if ((is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) & - (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY]))) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,"This figure is not designed to compare to categorical variables.") - } else if (is.factor(md[,input$mdScatterX]) | is.character(md[,input$mdScatterX])) { - par(mar=c(3,3,2,1),mgp=2:0) - if (any(ci())) { - temp1 <- tapply(md[!ci(),input$mdScatterY],as.factor(md[!ci(),input$mdScatterX]),c) - temp2 <- tapply(md[ci(),input$mdScatterY],as.factor(md[ci(),input$mdScatterX]),c) - plot(x=NULL,y=NULL,ylim=range(md[,input$mdScatterY]), - xlim=c(0,length(levels(as.factor(md[,input$mdScatterX]))) * 3), - log=sub("notch","",paste(input$scatterLog,collapse="")),xaxt="n", - xlab=input$mdScatterX,ylab=input$mdScatterY) - boxplot(temp1,add=T,xaxt="n",notch="notch" %in% input$scatterLog, - at=seq(1,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3)) - boxplot(temp2,add=T,xaxt="n",notch="notch" %in% input$scatterLog,border="red", - at=seq(2,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3)) - axis(side=1,at=seq(1.5,length(levels(as.factor(md[,input$mdScatterX]))) * 3,by=3), - labels=names(temp1)) - legend("top",bty="n",xpd=NA,inset=c(0,-.05),pch=0,col="red", - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } else { - boxplot(tapply(md[,input$mdScatterY],as.factor(md[,input$mdScatterX]),c), - xlab=input$mdScatterX,ylab=input$mdScatterY, - log=sub("notch","",paste(input$scatterLog,collapse="")), - notch="notch" %in% input$scatterLog) - } - } else if (is.factor(md[,input$mdScatterY]) | is.character(md[,input$mdScatterY])) { - par(mar=c(3,3,2,1),mgp=2:0) - if (any(ci())) { - temp1 <- tapply(md[!ci(),input$mdScatterX],as.factor(md[!ci(),input$mdScatterY]),c) - temp2 <- tapply(md[ci(),input$mdScatterX],as.factor(md[ci(),input$mdScatterY]),c) - plot(x=NULL,y=NULL,xlim=range(md[,input$mdScatterX]), - ylim=c(0,length(levels(as.factor(md[,input$mdScatterY]))) * 3), - log=sub("notch","",paste(input$scatterLog,collapse="")),yaxt="n", - xlab=input$mdScatterX,ylab=input$mdScatterY) - boxplot(temp1,add=T,horizontal=T,yaxt="n",notch="notch" %in% input$scatterLog, - at=seq(1,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3)) - boxplot(temp2,add=T,horizontal=T,yaxt="n",notch="notch" %in% input$scatterLog,border="red", - at=seq(2,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3)) - axis(side=2,at=seq(1.5,length(levels(as.factor(md[,input$mdScatterY]))) * 3,by=3), - labels=names(temp1)) - legend("top",bty="n",xpd=NA,inset=c(0,-.05),pch=0,col="red", - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } else { - boxplot(tapply(md[,input$mdScatterX],as.factor(md[,input$mdScatterY]),c), - horizontal=T,xlab=input$mdScatterX,ylab=input$mdScatterY, - log=sub("notch","",paste(input$scatterLog,collapse="")), - notch="notch" %in% input$scatterLog) - } - } else { - layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5)) - par(mar=c(3,3,0,0),mgp=2:0,cex=1.1) - plot(md[!ci(),input$mdScatterX],md[!ci(),input$mdScatterY], - log=sub("notch","",paste(input$scatterLog,collapse="")), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab=input$mdScatterX,ylab=input$mdScatterY) - points(md[ci(),input$mdScatterX],md[ci(),input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2)) - if (any(ci())) { - legend("topleft",bty="n",pch=21,col="red",pt.bg=alpha("red",0.5), - legend=paste("Cluster",hiC(),"-",d$clusterID[[res()]][hiC()])) - } - if ("x" %in% input$scatterLog) { tempLX <- "x" } else { tempLX <- "" } - if ("y" %in% input$scatterLog) { tempLY <- "y" } else { tempLY <- "" } - par(mar=c(0,3,1,0)) - boxplot(tapply(md[,input$mdScatterX],ci(),c),log=tempLX, - horizontal=T,xaxt="n",yaxt="n",border=c("black","red")) - par(mar=c(3,0,0,1)) - boxplot(tapply(md[,input$mdScatterY],ci(),c),log=tempLY, - horizontal=F,xaxt="n",yaxt="n",border=c("black","red")) - } - } - - output$mdScatter <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdScatter()) - } - }) - - output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdScatter()) - dev.off() - } - ) - - #### Metadata Factor Barplot #### - output$mdFactorOpts <- renderUI({ - if (is.factor(md[,input$mdFactorData]) | is.character(md[,input$mdFactorData])) { - radioButtons("mdFactorRA","Factor counts per cluster:",inline=T, - choices=list("Absolute"="absolute","Relative"="relative")) - } else { - checkboxGroupInput("mdFactorOpts",inline=T,label="Figure options", - choices=c("Log scale"="y","Show notch"="notch"),selected="notch") - } - }) - - plot_mdFactor <- function() { - if (is.factor(md[,input$mdFactorData]) | is.character(md[,input$mdFactorData])) { - id <- switch(input$mdFactorRA, - "relative"=tapply(md[,input$mdFactorData],clusts(), - function(X) table(X) / length(X)), - "absolute"=tapply(md[,input$mdFactorData],clusts(),table)) - if (is.list(id)) { id <- do.call(cbind,id) } - idylab <- switch(input$mdFactorRA, - "relative"="Proportion of cells per cluster", - "absolute"="Number of cells per cluster") - if (length(levels(md[,input$mdFactorData])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$mdFactorData])), - "Dark2")[1:length(levels(md[,input$mdFactorData]))] - } else { - idcol <- rainbow2(length(levels(md[,input$mdFactorData]))) - } - par(mar=c(3,3,2,1),mgp=2:0) - barplot(id,col=idcol,ylab=idylab, - legend.text=levels(md[,input$mdFactorData]), - args.legend=list(x="topright",horiz=T,inset=c(0,-.08),bty="n")) - mtext(input$mdFactorData,side=3,adj=0,font=2,line=1,cex=1.2) - } else { - par(mar=c(3,3,2,1),mgp=2:0) - boxplot(tapply(md[,input$mdFactorData],cl[,res()],c), - ylab=input$mdFactorData,notch="notch" %in% input$mdFactorOpts, - log=sub("notch","",paste(input$mdFactorOpts,collapse="")), - border=clustCols(res()),col=alpha(clustCols(res()),0.3)) - } - } - - output$mdFactor <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdFactor()) - } - }) - - output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_mdFactor()) - dev.off() - } - ) - - - ######## Differentially Expressed Genes per Cluster ######### - - output$heatDEtype <- renderUI({ - if (!is.null(res())) { - if (grepl("^Comp",res())) { - temp <- list("DE vs rest"="deTissue", - "Set A vs Set B"="deMarker") - } else { - temp <- list("DE vs rest"="deTissue", - "Marker genes"="deMarker", - "DE vs neighbour"="deNeighb") - } - radioButtons("heatG","Heapmap Genes:",choices=temp,selected="deMarker") - } - }) - - output$DEgeneSlider <- renderUI({ - if (length(res()) > 0) { - if (input$heatG == "deTissue") { - sliderInput("DEgeneCount",min=1,max=max(sapply(d$deTissue[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression of cluster over tissue", - "# of genes per cluster to show",sep="
"))) - } else if (input$heatG == "deMarker") { - if (grepl("^Comp",res())) { - temp_label <- HTML(paste( - "Positive differential gene expression between sets", - "# of genes per set to show",sep="
")) - } else { - temp_label <- HTML(paste( - "Positive differential gene expression between cluster and all other clusters", - "# of genes per cluster to show",sep="
")) - } - sliderInput("DEgeneCount",min=1,max=max(sapply(d$deMarker[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=temp_label) - } else if (input$heatG == "deNeighb") { - sliderInput("DEgeneCount",min=1,max=max(sapply(deNeighb[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste( - "Positive differential gene expression between cluster and nearest neighbour", - "# of genes per cluster to show",sep="
"))) - } - } - }) - - output$DEclustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("DEclustNum","Cluster # for gene list", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } - }) - - heatGenes <- reactive({ - temp <- unique(unlist(lapply( - switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]), - function(X) - if (nrow(X) == 0) { NA } else { rownames(X)[1:input$DEgeneCount] } - ))) - temp <- temp[!is.na(temp)] - return(temp) - }) - - clustMeans <- reactive({ #This only works if input is in ascending order of adjusted p value. - temp <- sapply(d$CGS[[res()]],function(X) X[heatGenes(),"MTC"]) - rownames(temp) <- heatGenes() - return(t(temp)) - }) - - hC <- reactive({ - if (exists("deDist")) { - if (res() %in% names(deDist)) { - return(hclust(as.dist(deDist[[res()]]),"single")) - } else { - return(hclust(dist(clustMeans()),"single")) - } - } else { - return(hclust(dist(clustMeans()),"single")) - } - }) - hG <- reactive(hclust(dist(t(clustMeans())),"complete")) - - sepClust <- reactive({ - if (hiC() == "") { - return(c(NA,NA)) - } else { - return(nrow(clustMeans()) - - c(which(levels(clusts())[hC()$order] == hiC()) - 1, - which(levels(clusts())[hC()$order] == hiC()))) - } - }) - - plot_heatmap <- function() { - if (length(levels(clusts())) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Heatmap cannot be computed", - "with less than two clusters.",sep="\n")) - } else if (length(heatGenes()) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,"There are no differentially expressed genes.") - } else { - if ("Unselected" %in% levels(clusts())) { - tempLabRow <- c(paste(levels(clusts())[!levels(clusts()) == "Unselected"], - paste(sapply(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": "),"Unselected") - } else { - tempLabRow <- paste(paste0("Cluster ",levels(clusts())), - paste(sapply(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": ") - } - heatmap.2(clustMeans(),Rowv=as.dendrogram(hC()),Colv=as.dendrogram(hG()),scale="column", - margins=c(9,12),lhei=c(2,10),lwid=c(1,11),trace="none", - keysize=1.5,density.info="none",key.par=list(mar=c(3,.5,2,.5),mgp=2:0), - cexCol=1 + 1/log2(nrow(clustMeans())),cexRow=1 + 1/log2(ncol(clustMeans())), - RowSideColors=clustCols(res()),labRow=tempLabRow, - rowsep=sepClust(),col=viridis(100,d=-1)) - } - } - - output$heatmap <- renderPlot({ - if (length(res()) > 0) { - print(plot_heatmap()) - } - }) - - output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_heatmap()) - dev.off() - } - ) - - output$deGeneSave <- downloadHandler( - filename=function() { paste0(input$heatG,"_",input$DEclustNum,".txt") }, - content=function(file) { - outTable <- switch(input$heatG, - deTissue=d$deTissue[[res()]][[input$DEclustNum]], - deMarker=d$deMarker[[res()]][[input$DEclustNum]], - deNeighb=deNeighb[[res()]][[input$DEclustNum]]) - write.table(outTable,file,quote=F,sep="\t",row.names=T,col.names=NA) - } - ) - - - #### Gene search box #### - output$geneSearchBox <- renderUI({ - if (input$searchType == "comma") { - textInput("GOI",width="100%", - label=paste("Enter list of genes,", - "(comma/space-separated, case-insensitive)", - "and click Search")) - } else if (input$searchType == "regex") { - textInput("GOI",value=demoRegex,width="100%", - label="Search for genes by regular expression and click Search") - } - }) - GOI <- eventReactive(input$GOIgo,{ - if (input$searchType == "comma") { - tempGeneList <- strsplit(input$GOI,split="[\\s,]",perl=T)[[1]] - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType == "regex") { - return(grep(input$GOI,rownames(nge),value=T,ignore.case=T)) - } - }, - ignoreNULL=F) - - - #### Gene expression in cluster #### - output$genePlotClustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("genePlotClust","Cluster:",selected=cSelected(), - choices=c("",levels(clusts())[!levels(clusts()) == "Unselected"])) - } - }) - - cellMarkCols <- reactive(rainbow2(length(cellMarkers))) - - plot_clusterGenes <- function() { - doubleDot <- function(col1,col2) { - upper.half.circle <- function(col1){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border=NA) - } - lower.half.circle <- function(col2){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0-cos(rs) - yc <- 0-sin(rs) - polygon(xc,yc,col=col2,border=NA) - } - upper.half.circle(col1) - lower.half.circle(col2) - rs <- seq(0,2*pi,len=200) - polygon(cos(rs),sin(rs),border="white") - } - singleDot <- function(col1){ - rs <- seq(0,2*pi,len=200) - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border="white") - } - par(mar=c(3,3,3,20),mgp=2:0) - if (hiC() == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above", - "or select a cluster from the drop-down list above left", - "to see gene expression for that cluster.",sep="\n")) - } else { - temp_ylab <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - plot(MDTC~DR, - data=d$CGS[[res()]][[hiC()]][ - !((d$CGS[[res()]][[hiC()]]$cMu | d$CGS[[res()]][[hiC()]]$cMs) & - d$CGS[[res()]][[hiC()]]$overCut),], - col=alpha("black",0.3), - xlab="Proportion of cells in which gene was detected", - ylab=paste("Mean normalized gene expression where detected",temp_ylab)) - title(paste0("Cluster ", hiC(),": ",d$clusterID[[res()]][hiC()]),cex=1.2) - mtext(paste("Cells:",sum(clusts()==hiC()), - " Genes detected:",length(d$CGS[[res()]][[hiC()]]$DR)),side=3,line=0,cex=0.9) - box(col=clustCols(res())[hiC()],lwd=2) - - if (input$cgLegend == "markers") { - for (x in which(d$CGS[[res()]][[hiC()]]$cMu)) { - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=singleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs)) { - temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))),"&")) - my.symbols(x=d$CGS[[res()]][[hiC()]]$DR[x], - y=d$CGS[[res()]][[hiC()]]$MDTC[x], - symb=doubleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[as.integer(temp[1])], - col2=cellMarkCols()[as.integer(temp[2])])) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMu & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[which(sapply(cellMarkersU,function(X) - d$CGS[[res()]][[hiC()]]$genes[x] %in% X))]) - } - for (x in which(d$CGS[[res()]][[hiC()]]$cMs & d$CGS[[res()]][[hiC()]]$overCut)) { - text(x=d$CGS[[res()]][[hiC()]]$DR[x],y=d$CGS[[res()]][[hiC()]]$MDTC[x], - labels=d$CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[as.integer(temp[2])]) - } - legend(x=1.05,y=max(d$CGS[[res()]][[hiC()]]$MDTC),xpd=NA,bty="n",ncol=1, - pch=19,col=cellMarkCols(),legend=names(cellMarkersU)) - - } else if (input$cgLegend == "heatmap") { - degl <- rownames(d$CGS[[res()]][[hiC()]]) %in% - rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[hiC()]])[1:input$DEgeneCount] - if (any(degl)) { - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - temp_n <- nrow(switch(input$heatG, - deTissue=d$deTissue, - deMarker=d$deMarker, - deNeighb=deNeighb)[[res()]][[hiC()]]) - temp_lab <- switch(input$heatG, - deTissue=" DE genes vs rest of cells in sample", - deMarker=" marker genes", - deNeighb=" DE genes vs nearest neighbouring cluster") - legend("top",bty="n",pch=16,col="darkred", - legend=paste0(temp_n,temp_lab," (showing top ", - min(temp_n,input$DEgeneCount),")")) - } else if (input$cgLegend == "search" & length(GOI()) > 0) { - degl <- which(rownames(nge) %in% GOI()) - points(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=d$CGS[[res()]][[hiC()]]$DR[degl],y=d$CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=d$CGS[[res()]][[hiC()]]$genes[degl]) - } - } - } - - output$clusterGenes <- renderPlot({ - if (length(res()) > 0) { - print(plot_clusterGenes()) - } - }) - - output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_clusterGenes()) - dev.off() - } - ) - - #### Gene expression comparison #### - clickGenes <- reactiveVal() - observeEvent(input$cgClick,{ - t <- nearPoints(d$CGS[[res()]][[hiC()]],input$cgClick,xvar="DR",yvar="MDTC") - clickGenes(t$genes) - }) - observeEvent(input$scatterClick,{ - t <- nearPoints(compDF(),input$scatterClick,xvar="x",yvar="y") - clickGenes(t$genes) - }) - - output$cgSelect <- renderUI({ - if (length(res()) > 0) { - if (input$boxplotGene == "click") { - selectInput("cgGene",choices=sort(clickGenes()),label="Select gene from list:") - } else if (input$boxplotGene == "search") { - selectInput("cgGene",choices=sort(GOI()),label="Select gene from list:") - } - } - }) - - plot_geneTest <- function() { - if (input$cgGene == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Select a gene by either clicking on the plot above", - "or searching for genes of interest in the search bar above,", - "then pick the gene from the list just above this figure", - "to see a comparison of that gene's expression across all clusters.",sep="\n")) - } else { - temp_ylab <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - temp_pos <- switch(as.character(length(levels(clusts())) > 1),"TRUE"=hC()$order,"FALSE"=1) - layout(matrix(2:1,nrow=2),heights=c(1,4)) - par(mar=c(3,3,0,3),mgp=2:0) - suppressWarnings(boxplot(vector("list",length(levels(clusts()))), - ylim=range(nge[input$cgGene,]), - ylab=paste(input$cgGene,"normalized gene expression",temp_ylab), - xlab=NA,xaxt="n")) - mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) - mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(get(egDB),keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), - side=1,line=2,font=2) - } - if ("sct" %in% input$bxpOpts) { - bxpCol <- alpha(clustCols(res()),.2) - } else { - bxpCol <- alpha(clustCols(res()),.8) - } - for (i in temp_pos) { - boxplot(nge[input$cgGene,clusts() == levels(clusts())[i]],add=T, - at=which(temp_pos == i),notch="notch" %in% input$bxpOpts,col=bxpCol[i],outline=F) - if ("sct" %in% input$bxpOpts) { - points(jitter(rep(which(temp_pos == i),sum(clusts() == levels(clusts())[i])),amount=.2), - nge[input$cgGene,clusts() == levels(clusts())[i]],pch=20,col=alpha(clustCols(res())[i],.4)) - } - } - if ("rnk" %in% input$bxpOpts) { - points(x=seq_along(d$CGS[[res()]]), - y=sapply(d$CGS[[res()]][temp_pos],function(X) X[input$cgGene,"MTCrank"]) * - max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - pch=25,cex=1.2,col="darkred",bg="firebrick2") - axis(side=4,at=seq(0,1,.25) * max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - labels=percent(seq(0,1,.25)),col.ticks="darkred",col.axis="darkred") - mtext(side=4,line=2,text="Quantile of gene expression per cluster",col="darkred") - } - if (length(temp_pos) > 1) { - par(new=F,mar=c(0,3,1,3)) - plot(as.dendrogram(hC()),leaflab="none") - } - } - } - - output$geneTest <- renderPlot({ - if (length(res()) > 0) { - print(plot_geneTest()) - } - }) - - output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", - content=function(file) { - pdf(file,width=12,height=7) - print(plot_geneTest()) - dev.off() - } - ) - - - ######## Cluster comparison ######### - output$resSelect2 <- renderUI({ - selectInput("res2","Resolution:",choices=clustList(),selected=res(),width="100%") - }) - output$saveButton2 <- renderUI({ - if (grepl("^Comp",input$res2)) { - actionButton("updateForViz2","Save this comparison to disk",icon("save")) - } - }) - output$setScatterY <- renderUI({ - if ("Unselected" %in% levels(clusts())) { - selectInput("ssY",label="Cluster on Y-axis",selected="Set A", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } else { - selectInput("ssY",label="Cluster on Y-axis",choices=c("",levels(clusts())),selected=hiC()) - } - }) - output$setScatterX <- renderUI({ - if ("Unselected" %in% levels(clusts())) { - selectInput("ssX",label="Cluster on X-axis",selected="Set B", - choices=levels(clusts())[!levels(clusts()) == "Unselected"]) - } else { - selectInput("ssX",label="Cluster on X-axis",choices=c("",levels(clusts())), - selected=unique(gsub(pattern="^vs\\.|\\.[A-Za-z]+?$","", - colnames(deNeighb[[res()]][[hiC()]])))) - } - }) - output$diffLabelSelect <- renderUI({ - if (input$diffLabelType == "diff") { - sliderInput("diffCount",min=1,max=100,value=5,step=1,width="100%", - label="Number of genes to label") - } else if (input$diffLabelType == "de") { - if (input$heatG == "deTissue") { - sliderInput("diffCount",value=5,step=1,ticks=T,width="100%", - min=1,max=max(sapply(d$deTissue[[res()]][c(input$ssX,input$ssY)],nrow)), - label="DE vs rest: # of genes to label") - } else if (input$heatG == "deMarker") { - if (grepl("^Comp",res())) { - temp_label <- "Set A vs Set B: # of genes to label" - } else { - temp_label <- "Marker genes: # of genes to label" - } - sliderInput("diffCount", - min=1,max=max(sapply(d$deMarker[[res()]][c(input$ssX,input$ssY)],nrow)), - value=5,step=1,ticks=T,width="100%", - label=temp_label) - } else if (input$heatG == "deNeighb") { - sliderInput("diffCount",value=5,step=1,ticks=T,width="100%", - min=1,max=max(sapply(deNeighb[[res()]][c(input$ssX,input$ssY)],nrow)), - label="DE vs neighbour: # of genes to label") - } - } - }) - - compDF <- reactive({ - data.frame(x=d$CGS[[res()]][[input$ssX]][,input$scatterInput], - y=d$CGS[[res()]][[input$ssY]][,input$scatterInput], - genes=d$CGS[[res()]][[input$ssX]]$genes) - }) - - LBF <- reactive({ - lm(y~x,data=compDF()) - }) - - diffRanked <- reactive({ - if (input$scatterLine == "sub") { - temp <- d$CGS[[res()]][[input$ssY]][,input$scatterInput] - - d$CGS[[res()]][[input$ssX]][,input$scatterInput] - names(temp) <- rownames(d$CGS[[res()]][[input$ssY]]) - return(sort(temp,decreasing=T)) - } else if (input$scatterLine == "lbf") { - temp <- LBF()$residuals - names(temp) <- rownames(d$CGS[[res()]][[input$ssY]]) - return(sort(temp,decreasing=T)) - } - }) - - plot_setScatter <- function() { - if (!is.null(res())) { - if (input$ssX %in% levels(clusts()) & input$ssY %in% levels(clusts())) { - temp_exp <- switch(as.character(exponent == exp(1)), - "TRUE"="(natural log scale)", - "FALSE"=paste0("(log",exponent," scale)")) - temp_label <- switch(input$scatterInput, - "MTC"=paste("Mean normalized gene expression",temp_exp), - "MDTC"=paste("Mean normalized gene expression where detected",temp_exp), - "DR"="Proportion of cells in which gene was detected") - par(mar=c(3,3,2,1),mgp=2:0) - plot(d$CGS[[res()]][[input$ssX]][,input$scatterInput], - d$CGS[[res()]][[input$ssY]][,input$scatterInput], - xlab=paste0(input$ssX,": ",temp_label), - ylab=paste0(input$ssY,": ",temp_label), - main=paste(switch(input$scatterInput, - "MTC"="Mean gene expression", - "MDTC"="Mean detected gene expression", - "DR"="Detection rate"), - "comparison:",input$ssY,"vs.",input$ssX), - pch=20,col=alpha("black",0.3)) - lines(x=c(par("usr")[1],par("usr")[2]),y=c(par("usr")[3],par("usr")[3]), - lwd=2,col=clustCols(res())[which(levels(clusts()) == input$ssX)],xpd=NA) - lines(x=c(par("usr")[1],par("usr")[1]),y=c(par("usr")[3],par("usr")[4]), - lwd=2,col=clustCols(res())[which(levels(clusts()) == input$ssY)],xpd=NA) - if (input$scatterLabelAngle) { - temp_srt <- 315 - temp_adjX <- c(-0.15,0.5) - temp_adjY <- c(1.15,0.5) - } else { - temp_srt <- 45 - temp_adjX <- c(-0.15,0.5) - temp_adjY <- c(-0.15,0.5) - } - if (input$scatterLine == "sub") { - abline(0,1) - } else if (input$scatterLine == "lbf") { - abline(LBF()) - } - if (input$diffLabelType == "diff") { - points(d$CGS[[res()]][[input$ssX]][names(head(diffRanked(),input$diffCount)),input$scatterInput], - d$CGS[[res()]][[input$ssY]][names(head(diffRanked(),input$diffCount)),input$scatterInput], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssY)],0.8)) - text(d$CGS[[res()]][[input$ssX]][names(head(diffRanked(),input$diffCount)),input$scatterInput], - d$CGS[[res()]][[input$ssY]][names(head(diffRanked(),input$diffCount)),input$scatterInput], - labels=names(head(diffRanked(),input$diffCount)),srt=temp_srt,adj=temp_adjY, - col=clustCols(res())[which(levels(clusts()) == input$ssY)],font=2) - points(d$CGS[[res()]][[input$ssX]][names(tail(diffRanked(),input$diffCount)),input$scatterInput], - d$CGS[[res()]][[input$ssY]][names(tail(diffRanked(),input$diffCount)),input$scatterInput], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssX)],0.8)) - text(d$CGS[[res()]][[input$ssX]][names(tail(diffRanked(),input$diffCount)),input$scatterInput], - d$CGS[[res()]][[input$ssY]][names(tail(diffRanked(),input$diffCount)),input$scatterInput], - labels=names(tail(diffRanked(),input$diffCount)),srt=temp_srt,adj=temp_adjX, - col=clustCols(res())[which(levels(clusts()) == input$ssX)],font=2) - } else if (input$diffLabelType == "de") { - degX <- rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[input$ssX]])[1:input$diffCount] - if (length(degX) > 0) { - points(d$CGS[[res()]][[input$ssX]][degX,input$scatterInput], - d$CGS[[res()]][[input$ssY]][degX,input$scatterInput], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssX)],0.8)) - text(d$CGS[[res()]][[input$ssX]][degX,input$scatterInput], - d$CGS[[res()]][[input$ssY]][degX,input$scatterInput], - labels=degX,srt=temp_srt,adj=temp_adjX, - col=clustCols(res())[which(levels(clusts()) == input$ssX)],font=2) - } - degY <- rownames(switch(input$heatG, - deTissue=d$deTissue[[res()]], - deMarker=d$deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[input$ssY]])[1:input$diffCount] - if (length(degY) > 0) { - points(d$CGS[[res()]][[input$ssX]][degY,input$scatterInput], - d$CGS[[res()]][[input$ssY]][degY,input$scatterInput], - pch=16,col=alpha(clustCols(res())[which(levels(clusts()) == input$ssY)],0.8)) - text(d$CGS[[res()]][[input$ssX]][degY,input$scatterInput], - d$CGS[[res()]][[input$ssY]][degY,input$scatterInput], - labels=degY,srt=temp_srt,adj=temp_adjY, - col=clustCols(res())[which(levels(clusts()) == input$ssY)],font=2) - } - } else if (input$diffLabelType == "search" & length(GOI()) > 0) { - points(d$CGS[[res()]][[input$ssX]][GOI(),input$scatterInput], - d$CGS[[res()]][[input$ssY]][GOI(),input$scatterInput], - pch=16,col=alpha("darkred",0.8)) - text(d$CGS[[res()]][[input$ssX]][GOI(),input$scatterInput], - d$CGS[[res()]][[input$ssY]][GOI(),input$scatterInput], - labels=GOI(),srt=temp_srt,adj=temp_adjX,col="darkred",font=2) - } - } - } - } - - output$setScatter <- renderPlot(print(plot_setScatter())) - - output$setScatterSave <- downloadHandler( - filename="setScatter.pdf", - content=function(file) { - pdf(file,width=7,height=7) - print(plot_setScatter()) - dev.off() - } - ) - - output$setComparisonSave <- downloadHandler( - filename=function() { paste0(input$ssY,"vs",input$ssX,"_", - input$scatterInput,"_",input$scatterLine,".txt") }, - content=function(file) { - write.table(as.data.frame(diffRanked()),file,quote=F,sep="\t",row.names=T,col.names=F) - } - ) - - - ######## Custom sets for DE ######### - selectedSets <- reactiveValues(a=NULL,b=NULL) - - plot_tsne_selDE <- function() { - if (input$tsneSelDEcol == "") { - id <- rep(1,nrow(md)) - idcol <- "grey20" - } else if (is.factor(md[,input$tsneSelDEcol]) | is.character(md[,input$tsneSelDEcol])) { - id <- as.factor(md[,input$tsneSelDEcol]) - if (length(levels(md[,input$tsneSelDEcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneSelDEcol])), - "Dark2")[1:length(levels(md[,input$tsneSelDEcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneSelDEcol]))) - } - } else { - id <- cut(md[,input$tsneSelDEcol],100) - idcol <- viridis(100,d=-1) - } - par(mar=c(3,3,3,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - - if (input$tsneSelDEcol == "") { - } else if (is.factor(md[,input$tsneSelDEcol]) | is.character(md[,input$tsneSelDEcol])) { - legend("topleft",bty="n",horiz=T,xpd=NA,inset=c(0,-.09), - pch=21,col=idcol,pt.bg=alpha(idcol,0.5), - title=input$tsneSelDEcol,legend=levels(md[,input$tsneSelDEcol])) - } else { - legend("topleft",bty="n",horiz=T,xpd=NA,inset=c(0,-.09), - pch=21,col=viridis(3,d=-1),pt.bg=viridis(3,.5,d=-1), - title=input$tsneSelDEcol, - legend=c(round(min(md[,input$tsneSelDEcol]),2), - round((max(md[,input$tsneSelDEcol]) - - min(md[,input$tsneSelDEcol])) / 2,2), - round(max(md[,input$tsneSelDEcol]),2))) - } - - - points(dr_viz[selectedSets$a,],pch=19,col="#a50026") - points(dr_viz[selectedSets$b,],pch=19,col="#313695") - points(dr_viz[intersect(selectedSets$a,selectedSets$b),],pch=19,col="#ffffbf") - points(dr_viz[intersect(selectedSets$a,selectedSets$b),],pch=4,col="red") - - legend("topright",horiz=T,bty="n",xpd=NA,inset=c(0,-.09), - title="Selected Cells",legend=c("Set A","Set B","Both"), - pch=c(19,19,4),col=c("#a50026","#313695","red")) - } - output$tsneSelDE <- renderPlot({ print(plot_tsne_selDE()) }) - - - currSel <- reactive(rownames(brushedPoints(as.data.frame(dr_viz), - input$tsneBrush,xvar="tSNE_1",yvar="tSNE_2"))) - observeEvent(input$addCellsA,{ - selectedSets$a <- append(selectedSets$a,currSel()[!currSel() %in% selectedSets$a]) - }) - observeEvent(input$removeCellsA,{ - selectedSets$a <- selectedSets$a[!selectedSets$a %in% currSel()] - }) - observeEvent(input$addCellsB,{ - selectedSets$b <- append(selectedSets$b,currSel()[!currSel() %in% selectedSets$b]) - }) - observeEvent(input$removeCellsB,{ - selectedSets$b <- selectedSets$b[!selectedSets$b %in% currSel()] - }) - output$textSetA <- renderText(paste(length(selectedSets$a),"cells in Set A.")) - output$textSetB <- renderText(paste(length(selectedSets$b),"cells in Set B.")) - output$textOverlap <- renderText(paste(length(intersect(selectedSets$a,selectedSets$b)), - "cells in both sets.", - "Cells must be assigned to a single set prior to calculation.")) - - observeEvent(input$calcDE,{ - newRes <- paste0("Comp.",gsub("[^A-Za-z0-9]","",input$DEsetName)) - if (length(intersect(selectedSets$a,selectedSets$b)) > 0) { - output$calcText <- renderText("Sets can't overlap (please assign red cells to only one set).") - } else if (any(sapply(list(selectedSets$a,selectedSets$b),length) < 3)) { - output$calcText <- renderText("Each set must contain at least 3 cells.") - } else if (nchar(input$DEsetName) < 1) { - output$calcText <- renderText("Please name this comparison (in text box above).") - } else if (newRes %in% colnames(d$cl)) { - output$calcText <- renderText("This comparison name has already been used.") - } else { - output$calcText <- renderText("") - withProgress({ - temp <- rep("Unselected",nrow(d$cl)) - names(temp) <- rownames(d$cl) - temp[selectedSets$a] <- "Set A" - temp[selectedSets$b] <- "Set B" - d$cl[[newRes]] <- factor(temp) - - #### Gene stats per set #### - incProgress(amount=1/6,detail="Gene detection rate per set") - setCells <- d$cl[,newRes] != "Unselected" - DR <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) sum(Y>0)/length(Y))) - - incProgress(amount=1/6,detail="Mean detected gene expression per set") - MDTC <- apply(nge[,setCells],1,function(X) - tapply(X,d$cl[,newRes][setCells],function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - - incProgress(amount=1/6,detail="Mean gene expression per set") - MTC <- apply(nge,1,function(X) - tapply(X,d$cl[,newRes],mean.logX)) - - d$CGS[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - d$CGS[[newRes]][["Unselected"]] <- data.frame(MTC=MTC["Unselected",]) - for (i in names(d$CGS[[newRes]])) { - d$CGS[[newRes]][[i]]$MTCrank <- rank(d$CGS[[newRes]][[i]]$MTC, - ties.method="min")/nrow(d$CGS[[newRes]][[i]]) - if (i == "Unselected") { next } - d$CGS[[newRes]][[i]]$cMu <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersU) - d$CGS[[newRes]][[i]]$cMs <- rownames(d$CGS[[newRes]][[i]]) %in% unlist(cellMarkersS) - d$CGS[[newRes]][[i]]$overCut <- d$CGS[[newRes]][[i]]$MTC > mean(d$CGS[[newRes]][[i]]$MTC) - d$CGS[[newRes]][[i]]$genes <- rownames(d$CGS[[newRes]][[i]]) - } - if (length(cellMarkers) < 1) { - d$clusterID[[newRes]] <- sapply(d$CGS[[newRes]],function(Z) return("")) - } else if (!any(unlist(cellMarkers) %in% rownames(nge))) { - warning(paste("None of the provided cellMarkers are found in the data", - "(check your gene IDs against rownames in your data).")) - d$clusterID[[newRes]] <- sapply(d$CGS[[newRes]],function(Z) return("")) - } else { - d$clusterID[[newRes]] <- c(names(cellMarkers)[sapply(d$CGS[[newRes]][1:2],function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))], - "Unselected") - names(d$clusterID[[newRes]]) <- names(d$CGS[[newRes]]) - } - - #### deTissue - DE per cluster vs all other data #### - incProgress(amount=1/6,detail="DE vs tissue logGER calculations") - deT_logGER <- sapply(levels(d$cl[,newRes])[1:2],function(i) - MTC[i,] - apply(nge[,d$cl[,newRes] != i],1,mean.logX)) - deT_genesUsed <- apply(deT_logGER,2,function(X) which(X > logGERthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logGERthresh should be set to less than ", - min(apply(deT_logGER,2,function(X) max(abs(X)))), - ", the largest magnitude logGER between cluster ", - names(which.min(apply(deT_logGER,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - incProgress(amount=1/6,detail="DE vs tissue Wilcoxon rank sum calculations") - deT_pVal <- sapply(levels(d$cl[,newRes])[1:2],function(i) - apply(nge[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[d$cl[,newRes] == i],X[d$cl[,newRes] != i])$p.value),simplify=F) - d$deTissue[[newRes]] <- sapply(levels(d$cl[,newRes])[1:2],function(i) - data.frame(logGER=deT_logGER[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply( - p.adjust(do.call(rbind,d$deTissue[[newRes]])$pVal,"fdr"), - rep(names(sapply(d$deTissue[[newRes]],nrow)),sapply(d$deTissue[[newRes]],nrow)), - c) - for (i in names(d$deTissue[[newRes]])) { - d$deTissue[[newRes]][[i]] <- d$deTissue[[newRes]][[i]][tempQval[[i]] <= WRSTalpha,] - d$deTissue[[newRes]][[i]]$qVal <- tempQval[[i]][tempQval[[i]] <= WRSTalpha] - } - - #### deMarker - DE per cluster vs each other cluster #### - incProgress(amount=1/6,detail="Calculating Set A vs Set B") - - deM_dDR <- DR["Set A",] - DR["Set B",] - deM_logGER <- MTC["Set A",] - MTC["Set B",] - deM_genesUsed <- switch(threshType, - dDR=which(abs(deM_dDR) > dDRthresh), - logGER=which(abs(deM_logGER) > logGERthresh)) - if (length(deM_genesUsed) < 1) { - stop("Gene filtering threshold is set too high.") - } - - deM_pVal <- apply(nge[deM_genesUsed,],1,function(X) - wilcox.test(X[d$cl[,newRes] == "Set A"], - X[d$cl[,newRes] == "Set B"])$p.value) - - temp_deVS <- data.frame(dDR=deM_dDR[deM_genesUsed], - logGER=deM_logGER[deM_genesUsed], - pVal=deM_pVal)[order(deM_pVal),] - temp_deVS$qVal <- p.adjust(temp_deVS$pVal,"fdr") - - d$deMarker[[newRes]] <- list( - "Set A"=temp_deVS[temp_deVS[,threshType] > 0 & temp_deVS$qVal <= WRSTalpha,], - "Set B"=temp_deVS[temp_deVS[,threshType] < 0 & temp_deVS$qVal <= WRSTalpha,] - ) - d$deMarker[[newRes]][["Set B"]]$dDR <- d$deMarker[[newRes]][["Set B"]]$dDR * -1 - d$deMarker[[newRes]][["Set B"]]$logGER <- d$deMarker[[newRes]][["Set B"]]$logGER * -1 - - selectedSets$a <- selectedSets$b <- NULL - },message="DE calculations:") - - res(newRes) # Automatically update the view to show the calculated results. - } - }) - observeEvent(input$updateForViz, { - withProgress({ - new_cl <- d$cl[input$res] - new_CGS <- list() - for (i in names(d$CGS[[input$res]])) { - new_CGS[[input$res]][[i]] <- - d$CGS[[input$res]][[i]][colnames(d$CGS[[input$res]][[i]]) %in% c("DR","MDTC","MTC")] - } - new_deTissue <- d$deTissue[input$res] - new_deMarker <- d$deMarker[input$res] - incProgress(.5) - save(new_cl,new_CGS,new_deTissue,new_deMarker, - file=paste0(dataPath,dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData")) - },message=paste0( - "Saving ",dataTitle,"_selDE_",sub("Comp.","",input$res,fixed=T),".RData to ",dataPath)) - }) - - - ######## Distribution of genes of interest ######### - output$geneSearchBox1 <- renderUI({ - if (input$searchType1 == "comma") { - textInput("GOI1",width="100%", - label=paste("Enter list of genes")) - } else if (input$searchType1 == "regex") { - textInput("GOI1",value=demoRegex,width="100%", - label="Enter regular expression") - } - }) - - GOI1 <- eventReactive(input$GOI1go,{ - if (input$searchType1 == "comma") { - tempGeneList <- "" - try({ - tempGeneList <- strsplit(input$GOI1,split="[\\s,]",perl=T)[[1]] - },silent=T) - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType1 == "regex") { - return(grep(input$GOI1,rownames(nge),value=T,ignore.case=T)) - } - },ignoreNULL=F) - - output$GOI1select <- renderUI({ - selectInput("goi1",label="Select genes:",choices=sort(GOI1()),multiple=T) - }) - - output$geneSearchBox2 <- renderUI({ - if (input$searchType2 == "comma") { - textInput("GOI2",width="100%", - label=paste("Search by list of genes")) - } else if (input$searchType2 == "regex") { - textInput("GOI2",value=demoRegex,width="100%", - label="Search by regular expression") - } - }) - - GOI2 <- eventReactive(input$GOI2go,{ - if (input$searchType2 == "comma") { - tempGeneList <- "" - try({ - tempGeneList <- strsplit(input$GOI2,split="[\\s,]",perl=T)[[1]] - },silent=T) - return(rownames(nge)[which(toupper(rownames(nge)) %in% toupper(tempGeneList))]) - } else if (input$searchType2 == "regex") { - return(grep(input$GOI2,rownames(nge),value=T,ignore.case=T)) - } - },ignoreNULL=F) - - output$GOI2select <- renderUI({ - selectInput("goi2",label="Select genes:",choices=sort(GOI2()),multiple=T) - }) - - plot_tsneClust <- function() { - par(mar=c(3,3,4,1),mgp=2:0) - plot(dr_viz,pch=21, - col=alpha(clustCols(res())[clusts()],1), - bg=alpha(clustCols(res())[clusts()],0.5), - xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs")) - } - - plot_goi <- function(goi) { - if (length(goi) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("To search for your gene(s) of interest type a", - "list of genes or regex in the box above", - "then select the gene(s) from the drop-down list", - "in the \"Select genes:\" box above right.",sep="\n")) - } else { - if (length(goi) > 5) { goiL <- 5 } else { goiL <- length(goi) } - if (goiL > 1) { - gv <- apply(nge[goi,],2,max) - } else { - gv <- nge[goi,] - } - cv <- cut(gv,breaks=100,labels=F) - par(mar=c(3,3,goiL+1,1),mgp=2:0) - plot(dr_viz,pch=21,cex=1.3,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(100,.7,d=-1)[cv],bg=viridis(100,.3,d=-1)[cv]) - temp_yrange <- max(dr_viz[,2]) - min(dr_viz[,2]) - segments(x0=seq(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.95),length.out=1000), - y0=max(dr_viz[,2]) + temp_yrange * .045, - y1=max(dr_viz[,2]) + temp_yrange * .065, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.75), - quantile(range(dr_viz[,1]),.95)), - y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), - labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- - select(get(egDB),keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - if (length(tempGeneName) > 4) { - tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] - } - title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) - } - } - } - - output$goiPlot1 <- renderPlot({ - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - output$goiPlot2 <- renderPlot({ - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", - content=function(file) { - pdf(file,width=7,height=7) - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsneClust()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - -} -########## ShinyApp ########## -shinyApp(ui = ui, server = server) diff --git a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_deVS.RData b/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_deVS.RData deleted file mode 100644 index 5ae6fd9..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_deVS.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_forViz.RData b/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_forViz.RData deleted file mode 100644 index cbc2bff..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_forViz.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_savedRes.RData b/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_savedRes.RData deleted file mode 100644 index 3f535ee..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e13/e13_Cortical_Only_savedRes.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e13/intro.md b/ToBeConvertedToPkg/meCortex/e13/intro.md deleted file mode 100644 index 4193eb8..0000000 --- a/ToBeConvertedToPkg/meCortex/e13/intro.md +++ /dev/null @@ -1,2 +0,0 @@ -Welcome to the data portal for the 2017 Cell Reports paper [Developmental Emergence of Adult Neural Stem Cells as Revealed by Single-Cell Transcriptional Profiling](https://doi.org/10.1016/j.celrep.2017.12.017) by Yuzwa *et al.*, brought to you by [scClustViz](https://baderlab.github.io/scClustViz). These are embryonic day 13.5 cortically-derived cells. -Other datasets: [E11.5 Cerebral Cortex](https://innesbt.shinyapps.io/e11cortex/); [E15.5 Cerebral Cortex](https://innesbt.shinyapps.io/e15cortex/); [E17.5 Cerebral Cortex](https://innesbt.shinyapps.io/e17cortex/) diff --git a/ToBeConvertedToPkg/meCortex/e13/rsconnect/shinyapps.io/innesbt/e13cortex.dcf b/ToBeConvertedToPkg/meCortex/e13/rsconnect/shinyapps.io/innesbt/e13cortex.dcf deleted file mode 100644 index e77899b..0000000 --- a/ToBeConvertedToPkg/meCortex/e13/rsconnect/shinyapps.io/innesbt/e13cortex.dcf +++ /dev/null @@ -1,10 +0,0 @@ -name: e13cortex -title: -username: -account: innesbt -server: shinyapps.io -hostUrl: https://api.shinyapps.io/v1 -appId: 365575 -bundleId: 1500868 -url: https://innesbt.shinyapps.io/e13cortex/ -when: 1532568959.79838 diff --git a/ToBeConvertedToPkg/meCortex/e15/app.R b/ToBeConvertedToPkg/meCortex/e15/app.R deleted file mode 100644 index 77abaa2..0000000 --- a/ToBeConvertedToPkg/meCortex/e15/app.R +++ /dev/null @@ -1,1036 +0,0 @@ -######## User-defined variables ######## - -dataPath <- "e15_Cortical_Only_forViz.RData" -## ^ Point this to the output file from PrepareInputs.R -## If you set a default resolution in the Shiny app, it will save to the same directory. - -vizScriptPath <- "./" -## ^ Point this to the directory in which the "app.R" Shiny script resides - -species <- "mouse" -## ^ Set species ("mouse"/"human"). -## If other, add the annotation database from Bioconductor to the egDB <- switch() expression below. - -#### List known cell-type markers #### -cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna","Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b","Tle4", - "Nes","Cux1","Cux2","Tubb3","Rbfox3","Dcx"), - "Oligodendrocyte precursors"=c("Cspg4","Olig2","Pdgfra"), - "Oligodendrocytes"=c("Mbp","Mog","Plp1","Mag"), - "Astrocytes"=c("Aldh1l1","Gfap","Slc1a3","Glul"), - "Microglia"="Cx3cr1") -#cellMarkers <- list() -## ^ If you have canonical marker genes for expected cell types, list them here -## (see example above from mouse embryonic cortex). The Shiny app will attempt -## to label clusters in the tSNE projection by highest median gene expression. -## Otherwise leave the list blank (uncomment line above). - -######################################## - - - -######## Code to run the Shiny app ######## -library(markdown) -library(shiny) -library(cluster) -library(gplots) -library(scales) -library(viridis) -library(RColorBrewer) -library(TeachingDemos) - -library(org.Mm.eg.db) -egDB <- "org.Mm.eg.db" - -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -if (length(cellMarkers) < 1) { - cellMarkersS <- cellMarkersU <- list() -} else { - cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2,function(X) do.call(intersect,unname(cellMarkers[X]))) - try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2,function(X) paste(X,collapse="&")),silent=T) - cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] - cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) -} - -demoRegex <- switch(species,mouse="^Actb$",human="^ACTB$") - -load(dataPath) -temp_dataPath <- strsplit(dataPath,"/|\\\\") -dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",dataPath) -if (dataPath == "") { dataPath <- "./" } -dataTitle <- sub("\\..+$|_forViz\\..+$","",temp_dataPath[[1]][length(temp_dataPath[[1]])]) -rm(temp_dataPath) - -if (file.exists(paste0(dataPath,dataTitle,"_savedRes.RData"))) { - load(paste0(dataPath,dataTitle,"_savedRes.RData")) -} else { - savedRes <- NULL -} - -if (!file.exists(paste0(dataPath,"intro.md"))) { - write(paste0(dataTitle,": You can add to this preamble by editting ",dataPath,"intro.md"), - file=paste0(dataPath,"intro.md")) -} - -silDist <- dist(dr_clust,method="euclidean") -## ^ precalculating distances in reduced dimensionality space for the silhouette plot. - -for (l in names(CGS)) { - for (i in names(CGS[[l]])) { - CGS[[l]][[i]]$MTCrank <- rank(CGS[[l]][[i]]$MTC,ties.method="min")/nrow(CGS[[l]][[i]]) - CGS[[l]][[i]]$cMu <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersU) - CGS[[l]][[i]]$cMs <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersS) - CGS[[l]][[i]]$overCut <- CGS[[l]][[i]]$MTC > mean(CGS[[l]][[i]]$MTC) - CGS[[l]][[i]]$genes <- rownames(CGS[[l]][[i]]) - } -} - -if (length(cellMarkers) < 1) { - clusterID <- sapply(colnames(cl),function(X) rep("",nrow(cl)),simplify=F) -} else { - clusterID <- sapply(CGS,function(Z) { - temp <- names(cellMarkers)[sapply(Z,function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))] - names(temp) <- names(Z) - return(temp) - },simplify=F) -} - -#### Run the Shiny App! #### - - -########## UI ########## -ui <- fixedPage( - fixedRow( - titlePanel(paste("scClustViz -",dataTitle)), - includeMarkdown(paste0(dataPath,"intro.md")) - ), - hr(), - - ######## Cluster Resolution Selection ######## - fixedRow(titlePanel("Cluster Resolution Selection"), - column(6, - fixedRow(column(6,uiOutput("resSelect"),align="left"), - column(6,align="right", - actionButton("go","View clusters at this resolution",icon("play")), - actionButton("save","Save this resolution as default",icon("bookmark")))), - radioButtons("deType",NULL,list("# of marker genes per cluster"="deMarker", - "# of DE genes to nearest neighbouring cluster"="deNeighb"),inline=T), - plotOutput("cqPlot",height="500px")), - column(6,plotOutput("sil",height="600px")) - ), - fixedRow( - column(6,downloadButton("cqPlotSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") - ), - hr(), - - ######## Cell-type Clusters ######## - fixedRow(titlePanel("Cell-type Clusters")), - fixedRow( - column(6, - if (length(cellMarkers) > 0) { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn","Cluster annotations"="ca")) - } else { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn")) - }, - strong("Click point on plot below to select cluster")), - column(3,selectInput("mdScatterX","x axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_counts"),align="left"), - column(3,selectInput("mdScatterY","y axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_features"),align="left") - ), - fixedRow( - column(6,plotOutput("tsne",height="570px",click="tsneClick")), - column(6,plotOutput("mdScatter",height="570px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("mdScatterSave","Save as PDF")) - ), - hr(), - - fixedRow( - column(6,selectInput("tsneMDcol","Metadata:",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(3,selectInput("mdFactorData","Metadata (factor):", - choices=colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - selected=grep("phase", - colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - value=T,ignore.case=T)[1])), - column(3,radioButtons("mdFactorRA","Factor counts per cluster:",inline=T, - choices=list("Absolute"="absolute","Relative"="relative"))) - ), - fixedRow( - column(6,plotOutput("tsneMD",height="570px")), - column(6,plotOutput("mdFactor",height="570px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneMDSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) - ), - hr(), - - ######## Cluster-wise Gene Stats ######### - fixedRow(titlePanel("Cluster-wise Gene Stats")), - fixedRow( - column(2,radioButtons("heatG","Heapmap Genes:", - choices=list("DE vs tissue average"="deTissue", - "Marker genes"="deMarker", - "DE vs neighbour"="deNeighb"))), - column(2,uiOutput("DEclustSelect")), - column(2,downloadButton("deGeneSave","Download gene list"), - downloadButton("heatmapSave","Save as PDF"),align="right"), - column(6,uiOutput("DEgeneSlider")) - ), - fixedRow(plotOutput("heatmap",height="600px")), - hr(), - - fixedRow( - column(1,uiOutput("genePlotClustSelect")), - column(6,if (length(cellMarkers) > 0) { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Cell-type markers"="markers", - "Gene symbols (regex)"="regex", - "Top DE genes (from heatmap)"="heatmap")) - } else { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Gene symbols (regex)"="regex", - "Top DE genes (from heatmap)"="heatmap")) - }), - column(4,align="right",textInput("GOI","Gene symbols (regex)",demoRegex)), - column(1,actionButton("GOIgo","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOIgo { margin-top: 25px; }"), - fixedRow( - plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") - ), - hr(), - - fixedRow( - column(4,uiOutput("cgSelect")), - column(8, - radioButtons("boxplotGene",inline=T,label="Gene of interest:", - choices=c("Click from plot above"="click", - "From gene symbols (regex entry)"="regex")), - checkboxGroupInput("bxpOpts",label=NULL,selected=c("sct","rnk"),inline=T, - choices=list("Include scatterplot"="sct", - "Include gene rank"="rnk"))) - ), - fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") - ), - hr(), - - ######## Distribution of genes of interest ######### - fixedRow(titlePanel("Distribution of Genes of Interest")), - fixedRow( - column(4, - fixedRow( - radioButtons("plotClust1",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel1",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI1",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI1go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI1go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI1select")), - column(4, - fixedRow( - radioButtons("plotClust2",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel2",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI2",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI2go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI2go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI2select")) - ), - fixedRow( - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")), - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")) - ), - fixedRow( - column(6,plotOutput("goiPlot1",height="600px")), - column(6,plotOutput("goiPlot2",height="600px")) - ), - fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) - ), - h1() -) - - - -########## Server ########## -server <- function(input,output,session) { - clustCols <- reactive({ - if (length(levels(cl[,input$res])) <= 8) { - brewer.pal(length(levels(cl[,input$res])),"Dark2")[1:length(levels(cl[,input$res]))] - } else { - rainbow2(length(levels(cl[,input$res]))) - } - }) - - - ######## Cluster Resolution Selection ######## - #### Inter-cluster DE boxplots #### - numClust <- sapply(cl,function(X) length(levels(X))) - clustList <- as.list(colnames(cl)) - names(clustList) <- paste0(unlist(clustList),": ",numClust," clusters") - output$resSelect <- renderUI({ - selectInput("res","Resolution:",choices=clustList,selected=savedRes) - }) - numClust <- numClust[numClust > 1] - - plot_cqPlot <- function() { - numDEgenes <- lapply(get(input$deType),function(X) sapply(X,nrow)) - toplim <- c(21,max(unlist(numDEgenes)) + 20) - botlim <- c(-1,21) - - par(mar=c(0.2,3.5,1,1),mgp=2:0,mfrow=c(2,1)) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=toplim,yaxs="i",xaxt="n",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - - par(mar=c(3,3.5,0.2,1),mgp=2:0) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=botlim,yaxs="i",xlab="Number of clusters",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - mtext(switch(input$deType, - "deMarker"="Positive DE genes per cluster to all other clusters", - "deNeighb"="Positive DE genes per cluster to nearest cluster") - ,side=2,line=2.5,at=botlim[2],xpd=NA) - - } - - output$cqPlot <- renderPlot({ - print(plot_cqPlot()) - }) - - output$cqPlotSave <- downloadHandler( - filename="cqPlot.pdf", - content=function(file) { - pdf(file,width=6,height=5) - print(plot_cqPlot()) - dev.off() - } - ) - - #### Silhouette plot #### - plot_sil <- function() { - tempSil <- silhouette(as.integer(cl[,input$res]),dist=silDist) - par(mar=c(4,0,2,1),mgp=2:0) - if (length(tempSil) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Silhouette plot cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - plot(tempSil,beside=T,border=NA,main=NA,col=clustCols(),do.n.k=T) - } - } - - output$sil <- renderPlot({ - print(plot_sil()) - }) - - output$silSave <- downloadHandler( - filename="sil.pdf", - content=function(file) { - pdf(file,width=9,height=12) - print(plot_sil()) - dev.off() - } - ) - - #### res buttons #### - res <- eventReactive(input$go,input$res,ignoreNULL=F) - - observeEvent(input$save,{ - savedRes <<- input$res #<<- updates variable outside scope of function (ie. global environment) - save(savedRes,file=paste0(dataPath,dataTitle,"_savedRes.RData")) - }) - - - ######## Cell-type Clusters ######## - clusts <- reactive(cl[,res()]) - - #### Cell-type tSNE #### - plot_tsne_labels <- function() { - if (input$tsneLabels == "ca") { - temp_labelNames <- sapply(unique(clusterID[[res()]]),function(X) - names(which(clusterID[[res()]] == X)),simplify=F) - temp_labels <- apply(dr_viz,2,function(Y) - tapply(Y,apply(sapply(temp_labelNames,function(X) clusts() %in% X),1,which),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=names(temp_labelNames),font=2,cex=1.5) - } else if (input$tsneLabels == "cn") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=levels(clusts()),font=2,cex=1.5) - } else { - legend("center",legend="You changed the label choice names...") - } - } - - plot_tsne <- function() { - par(mar=c(4,3,3,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs"), - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(clustCols()[clusts()],0.2)[!ci()], - bg=alpha(clustCols()[clusts()],0.1)[!ci()]) - points(dr_viz[ci(),],pch=21, - col=alpha(clustCols()[clusts()],1)[ci()], - bg=alpha(clustCols()[clusts()],0.5)[ci()]) - } else { - points(dr_viz,pch=21, - col=alpha(clustCols()[clusts()],1), - bg=alpha(clustCols()[clusts()],0.5)) - } - if (hiC() != "") { - mtext(side=3,line=-1,text=paste("Cluster",hiC(),"-", - clusterID[[res()]][hiC()],"-", - sum(clusts() == hiC()),"cells")) - } - } - - output$tsne <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsne()) - print(plot_tsne_labels()) - } - }) - - output$tsneSave <- downloadHandler( - filename="tsne.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_tsne()) - dev.off() - } - ) - - #### clusterSelect #### - - - clusterSelect <- reactiveValues(cl=NULL) - - observeEvent(input$tsneClick,{ clusterSelect$cl <- input$tsneClick }) - - cSelected <- reactive({ - t <- nearPoints(as.data.frame(dr_viz),clusterSelect$cl,xvar="tSNE_1",yvar="tSNE_2",threshold=5) - t2 <- cl[rownames(t)[1],res()] - if (is.na(t2)) { return("") } else { return(t2) } - }) - - hiC <- reactive({ - if (length(res()) < 1) { - return("") - } else if (input$genePlotClust != "") { - cl[which(cl[,res()] == input$genePlotClust)[1],res()] - } else { - return(input$genePlotClust) - } - }) - - ci <- reactive({ - if (hiC() == "") { - rep(F,length(clusts())) - } else { - clusts() == hiC() - } - }) - - #### Metadata tSNE overlay #### - plot_tsneMD <- function() { - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - id <- as.factor(md[,input$tsneMDcol]) - if (length(levels(md[,input$tsneMDcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneMDcol])), - "Dark2")[1:length(levels(md[,input$tsneMDcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneMDcol]))) - } - } else { - id <- cut(md[,input$tsneMDcol],100) - idcol <- viridis(100,d=-1) - } - layout(cbind(2:1),heights=c(1,9)) - par(mar=c(3,3,0,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(idcol,.1)[id[!ci()]], - bg=alpha(idcol,0.05)[id[!ci()]]) - points(dr_viz[ci(),],pch=21, - col=alpha(idcol,.8)[id[ci()]], - bg=alpha(idcol,0.4)[id[ci()]]) - } else { - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - } - plot_tsne_labels() - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - par(mar=c(0,0,0,0)) - plot.new() - legend("bottom",bty="n",horiz=T,pch=c(NA,rep(21,length(levels(md[,input$tsneMDcol])))), - legend=c(paste0(input$tsneMDcol,":"),levels(md[,input$tsneMDcol])), - col=c(NA,idcol),pt.bg=c(NA,alpha(idcol,0.5))) - } else { - par(mar=c(0,5,3,3)) - barplot(rep(1,100),space=0,col=idcol,xaxt="n",yaxt="n",border=NA,main=input$tsneMDcol) - text(x=c(1,100),y=1,pos=c(2,4),xpd=NA,labels=round(range(md[,input$tsneMDcol]),2)) - } - } - - output$tsneMD <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsneMD()) - } - }) - - output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_tsneMD()) - dev.off() - } - ) - - #### Metadata Factor Barplot #### - plot_mdFactor <- function() { - id <- switch(input$mdFactorRA, - "relative"=tapply(md[,input$mdFactorData],clusts(), - function(X) table(X) / length(X)), - "absolute"=tapply(md[,input$mdFactorData],clusts(),table)) - if (is.list(id)) { id <- do.call(cbind,id) } - idylab <- switch(input$mdFactorRA, - "relative"="Proportion per cell type", - "absolute"="Counts per cell type") - if (length(levels(md[,input$mdFactorData])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$mdFactorData])), - "Dark2")[1:length(levels(md[,input$mdFactorData]))] - } else { - idcol <- rainbow2(length(levels(md[,input$mdFactorData]))) - } - par(mar=c(3,3,4,1),mgp=2:0) - barplot(id,col=idcol,ylab=idylab, - legend.text=levels(md[,input$mdFactorData]), - args.legend=list(x="topright",horiz=T,inset=c(0,-.08),bty="n")) - mtext(input$mdFactorData,side=3,adj=0,font=2,line=1,cex=1.2) - } - - output$mdFactor <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdFactor()) - } - }) - - output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_mdFactor()) - dev.off() - } - ) - - #### Metadata Scatterplot #### - plot_mdScatter <- function() { - layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5)) - par(mar=c(3,3,0,0),mgp=2:0,cex=1.1) - if (all(ci())) { - plot(md[,input$mdScatterX],md[,input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2), - xlab=input$mdScatterX,ylab=input$mdScatterY) - } else { - plot(md[!ci(),input$mdScatterX],md[!ci(),input$mdScatterY], - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab=input$mdScatterX,ylab=input$mdScatterY) - points(md[ci(),input$mdScatterX],md[ci(),input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2)) - } - if (any(ci())) { - legend("topleft",bty="n",pch=21,col="red",pt.bg=alpha("red",0.5), - legend=paste("Cluster",hiC(),"-",clusterID[[res()]][hiC()])) - } - par(mar=c(0,3,1,0)) - boxplot(tapply(md[,input$mdScatterX],ci(),c), - horizontal=T,xaxt="n",yaxt="n",border=c("black","red")) - par(mar=c(3,0,0,1)) - boxplot(tapply(md[,input$mdScatterY],ci(),c), - horizontal=F,xaxt="n",yaxt="n",border=c("black","red")) - } - - output$mdScatter <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdScatter()) - } - }) - - output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_mdScatter()) - dev.off() - } - ) - - - ######## Cluster-wise Gene Stats ######### - - #### Heatmap genes #### - output$DEgeneSlider <- renderUI({ - if (length(res()) > 0) { - switch(input$heatG, - deTissue= - sliderInput("DEgeneCount",min=2,max=max(sapply(deTissue[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression of cluster over tissue", - "# of genes per cluster to show",sep="
"))), - deMarker= - sliderInput("DEgeneCount",min=2,max=max(sapply(deMarker[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression between cluster and all other clusters", - "# of genes per cluster to show",sep="
"))), - deNeighb= - sliderInput("DEgeneCount",min=2,max=max(sapply(deNeighb[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression between cluster and nearest neighbour", - "# of genes per cluster to show",sep="
")))) - } - }) - - output$DEclustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("DEclustNum","Cluster # for gene list",choices=levels(clusts())) - } - }) - - heatGenes <- reactive({ - temp <- unique(unlist(lapply(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]]), - function(X) - if (nrow(X) == 0) { NA } else { rownames(X)[1:input$DEgeneCount] }))) - temp <- temp[!is.na(temp)] - return(temp) - }) - - clustMeans <- reactive({ #This only works if input is in ascending order of adjusted p value. - temp <- sapply(CGS[[res()]],function(X) X[heatGenes(),"MTC"]) - rownames(temp) <- heatGenes() - return(t(temp)) - }) - - hC <- reactive(hclust(dist(clustMeans()),"single")) - hG <- reactive(hclust(dist(t(clustMeans())),"complete")) - - sepClust <- reactive({ - if (hiC() == "") { - return(c(NA,NA)) - } else { - return(nrow(clustMeans()) - - c(which(levels(clusts())[hC()$order] == hiC()) - 1, - which(levels(clusts())[hC()$order] == hiC()))) - } - }) - - plot_heatmap <- function() { - if (length(levels(clusts())) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Heatmap cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - tempLabRow <- paste(paste0("Cluster ",levels(clusts())), - paste(sapply(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": ") - heatmap.2(clustMeans(),Rowv=as.dendrogram(hC()),Colv=as.dendrogram(hG()),scale="column", - col=viridis(100,d=-1),trace="none",margins=c(9,12),keysize=1,lhei=c(2,10),lwid=c(1,11), - cexCol=1 + 1/log2(nrow(clustMeans())),cexRow=1 + 1/log2(ncol(clustMeans())), - RowSideColors=clustCols(),labRow=tempLabRow,rowsep=sepClust()) - } - } - - output$heatmap <- renderPlot({ - if (length(res()) > 0) { - print(plot_heatmap()) - } - }) - - output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", - content=function(file) { - pdf(file,width=9,height=12) - print(plot_heatmap()) - dev.off() - } - ) - - output$deGeneSave <- downloadHandler( - filename=function() { paste0(input$heatG,"_",input$DEclustNum,".txt") }, - content=function(file) { - outTable <- switch(input$heatG, - deTissue=deTissue[[res()]][[input$DEclustNum]], - deMarker=deMarker[[res()]][[input$DEclustNum]], - deNeighb=deNeighb[[res()]][[input$DEclustNum]]) - write.table(outTable,file,quote=F,sep="\t",row.names=T,col.names=NA) - } - ) - - - #### clusterGenes #### - output$genePlotClustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("genePlotClust","Cluster:",choices=c("",levels(clusts())),selected=cSelected()) - } - }) - - cellMarkCols <- reactive(rainbow2(length(cellMarkers))) - - GOI <- eventReactive(input$GOIgo,grep(input$GOI,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - - plot_clusterGenes <- function() { - doubleDot <- function(col1,col2) { - upper.half.circle <- function(col1){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border=NA) - } - lower.half.circle <- function(col2){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0-cos(rs) - yc <- 0-sin(rs) - polygon(xc,yc,col=col2,border=NA) - } - upper.half.circle(col1) - lower.half.circle(col2) - rs <- seq(0,2*pi,len=200) - polygon(cos(rs),sin(rs),border="white") - } - singleDot <- function(col1){ - rs <- seq(0,2*pi,len=200) - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border="white") - } - par(mar=c(3,3,3,20),mgp=2:0) - if (hiC() == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above", - "to see gene expression for that cluster.",sep="\n")) - } else { - plot(MDTC~DR, - data=CGS[[res()]][[hiC()]][ - !((CGS[[res()]][[hiC()]]$cMu | CGS[[res()]][[hiC()]]$cMs) & CGS[[res()]][[hiC()]]$overCut),], - col=alpha("black",0.3), - xlab="Proportion of cells detecting gene", - ylab="Mean normalized gene expression of detected genes") - title(paste0("Cluster ", hiC(),": ",clusterID[[res()]][hiC()]),cex=1.2) - mtext(paste("Cells:",sum(clusts()==hiC()), - " Genes detected:",length(CGS[[res()]][[hiC()]]$DR)),side=3,line=0,cex=0.9) - box(col=clustCols()[hiC()],lwd=2) - - if (input$cgLegend == "markers") { - for (x in which(CGS[[res()]][[hiC()]]$cMu)) { - my.symbols(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x],symb=singleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[which(sapply(cellMarkersU,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))])) - } - for (x in which(CGS[[res()]][[hiC()]]$cMs)) { - temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))),"&")) - my.symbols(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x],symb=doubleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[as.integer(temp[1])], - col2=cellMarkCols()[as.integer(temp[2])])) - } - for (x in which(CGS[[res()]][[hiC()]]$cMu & CGS[[res()]][[hiC()]]$overCut)) { - text(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x], - labels=CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[which(sapply(cellMarkersU,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))]) - } - for (x in which(CGS[[res()]][[hiC()]]$cMs & CGS[[res()]][[hiC()]]$overCut)) { - text(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x], - labels=CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[as.integer(temp[2])]) - } - legend(x=1.05,y=max(CGS[[res()]][[hiC()]]$MDTC),xpd=NA,bty="n",ncol=1, - pch=19,col=cellMarkCols(),legend=names(cellMarkersU)) - - } else if (input$cgLegend == "heatmap") { - degl <- rownames(CGS[[res()]][[hiC()]]) %in% - rownames(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[hiC()]])[1:input$DEgeneCount] - if (any(degl)) { - points(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=CGS[[res()]][[hiC()]]$genes[degl]) - } - - } else if (input$cgLegend == "regex" & length(GOI()) > 0) { - degl <- which(rownames(nge) %in% GOI()) - points(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred",labels=CGS[[res()]][[hiC()]]$genes[degl]) - } - } - } - - output$clusterGenes <- renderPlot({ - if (length(res()) > 0) { - print(plot_clusterGenes()) - } - }) - - output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", - content=function(file) { - pdf(file,width=12,height=9) - print(plot_clusterGenes()) - dev.off() - } - ) - - #### Gene Stats Plot #### - cgGeneOpts <- reactive({ - t <- nearPoints(CGS[[res()]][[hiC()]],input$cgClick,xvar="DR",yvar="MDTC") - return(t$genes) - }) - - output$cgSelect <- renderUI({ - if (length(res()) > 0) { - if (input$boxplotGene == "click") { - selectInput("cgGene",label="Gene:",choices=sort(cgGeneOpts())) - } else if (input$boxplotGene == "regex") { - selectInput("cgGene",label="Gene:",choices=sort(GOI())) - } - } - }) - - plot_geneTest <- function() { - if (input$cgGene == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Select a gene by either clicking on the plot above", - "or entering regular expression capturing your gene symbol of interest", - "then pick the gene from the list just above this figure", - "to see a comparison of that gene's expression across all clusters.",sep="\n")) - } else { - temp_pos <- switch(as.character(length(levels(clusts())) > 1),"TRUE"=hC()$order,"FALSE"=1) - layout(matrix(2:1,nrow=2),heights=c(1,4)) - par(mar=c(3,3,0,3),mgp=2:0) - suppressWarnings(boxplot(vector("list",length(levels(clusts()))), - ylim=range(nge[input$cgGene,]), - ylab=paste(input$cgGene,"gene expression (log2)"), - xlab=NA,xaxt="n")) - mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) - mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(get(egDB),keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), - side=1,line=2,font=2) - } - if ("sct" %in% input$bxpOpts) { - bxpCol <- alpha(clustCols(),.2) - } else { - bxpCol <- alpha(clustCols(),.8) - } - for (i in temp_pos) { - boxplot(nge[input$cgGene,clusts() == levels(clusts())[i]], - col=bxpCol[i],at=which(temp_pos == i),add=T,notch=T,outline=F) - if ("sct" %in% input$bxpOpts) { - points(jitter(rep(which(temp_pos == i),sum(clusts() == levels(clusts())[i])),amount=.2), - nge[input$cgGene,clusts() == levels(clusts())[i]],pch=20,col=alpha(clustCols()[i],.4)) - } - } - if ("rnk" %in% input$bxpOpts) { - points(x=seq_along(CGS[[res()]]), - y=sapply(CGS[[res()]][temp_pos],function(X) X[input$cgGene,"MTCrank"]) * - max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - pch=25,cex=1.2,col="darkred",bg="firebrick2") - axis(side=4,at=seq(0,1,.25) * max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - labels=percent(seq(0,1,.25)),col.ticks="darkred",col.axis="darkred") - mtext(side=4,line=2,text="Quantile of gene expression per cluster",col="darkred") - } - if (length(temp_pos) > 1) { - par(new=F,mar=c(0,3,1,3)) - plot(as.dendrogram(hC()),leaflab="none") - } - } - } - - output$geneTest <- renderPlot({ - if (length(res()) > 0) { - print(plot_geneTest()) - } - }) - - output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", - content=function(file) { - pdf(file,width=12,height=9) - print(plot_geneTest()) - dev.off() - } - ) - - - ######## Distribution of genes of interest ######### - - GOI1 <- eventReactive(input$GOI1go,grep(input$GOI1,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - output$GOI1select <- renderUI({ - selectInput("goi1",label="Gene:",choices=sort(GOI1()),multiple=T) - }) - - GOI2 <- eventReactive(input$GOI2go,grep(input$GOI2,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - output$GOI2select <- renderUI({ - selectInput("goi2",label="Gene:",choices=sort(GOI2()),multiple=T) - }) - - plot_goi <- function(goi) { - if (length(goi) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("To search for your gene(s) of interest type a", - "search term (regex allowed) in the box above", - "then select the gene(s) from the drop-down list", - "in the \"Gene:\" box above right.",sep="\n")) - } else { - if (length(goi) > 5) { goiL <- 5 } else { goiL <- length(goi) } - if (goiL > 1) { - gv <- apply(nge[goi,],2,max) - } else { - gv <- nge[goi,] - } - cv <- cut(gv,breaks=100,labels=F) - par(mar=c(3,3,goiL+1,1),mgp=2:0) - plot(dr_viz,pch=21,cex=1.3,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(100,.7,d=-1)[cv],bg=viridis(100,.3,d=-1)[cv]) - temp_yrange <- max(dr_viz[,2]) - min(dr_viz[,2]) - segments(x0=seq(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.95),length.out=1000), - y0=max(dr_viz[,2]) + temp_yrange * .045, - y1=max(dr_viz[,2]) + temp_yrange * .065, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.75), - quantile(range(dr_viz[,1]),.95)), - y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), - labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- select(get(egDB),keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - if (length(tempGeneName) > 4) { tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] } - title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) - } - } - } - - output$goiPlot1 <- renderPlot({ - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", - content=function(file) { - pdf(file,width=10,height=10) - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - output$goiPlot2 <- renderPlot({ - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", - content=function(file) { - pdf(file,width=10,height=10) - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - -} - - -########## ShinyApp ########## -shinyApp(ui = ui, server = server) diff --git a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_deVS.RData b/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_deVS.RData deleted file mode 100644 index c61266b..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_deVS.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_forViz.RData b/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_forViz.RData deleted file mode 100644 index 6522999..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_forViz.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_savedRes.RData b/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_savedRes.RData deleted file mode 100644 index e69cdd2..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e15/e15_Cortical_Only_savedRes.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e15/intro.md b/ToBeConvertedToPkg/meCortex/e15/intro.md deleted file mode 100644 index a20a779..0000000 --- a/ToBeConvertedToPkg/meCortex/e15/intro.md +++ /dev/null @@ -1,2 +0,0 @@ -Welcome to the data portal for the 2017 Cell Reports paper [Developmental Emergence of Adult Neural Stem Cells as Revealed by Single-Cell Transcriptional Profiling](https://doi.org/10.1016/j.celrep.2017.12.017) by Yuzwa *et al.*, brought to you by [scClustViz](https://baderlab.github.io/scClustViz). These are embryonic day 15.5 cortically-derived cells. -Other datasets: [E11.5 Cerebral Cortex](https://innesbt.shinyapps.io/e11cortex/); [E13.5 Cerebral Cortex](https://innesbt.shinyapps.io/e13cortex/); [E17.5 Cerebral Cortex](https://innesbt.shinyapps.io/e17cortex/) diff --git a/ToBeConvertedToPkg/meCortex/e15/rsconnect/shinyapps.io/innesbt/e15cortex.dcf b/ToBeConvertedToPkg/meCortex/e15/rsconnect/shinyapps.io/innesbt/e15cortex.dcf deleted file mode 100644 index bbda2e2..0000000 --- a/ToBeConvertedToPkg/meCortex/e15/rsconnect/shinyapps.io/innesbt/e15cortex.dcf +++ /dev/null @@ -1,10 +0,0 @@ -name: e15cortex -title: -username: -account: innesbt -server: shinyapps.io -hostUrl: https://api.shinyapps.io/v1 -appId: 365579 -bundleId: 1444300 -url: https://innesbt.shinyapps.io/e15cortex/ -when: 1529722557.74518 diff --git a/ToBeConvertedToPkg/meCortex/e17/app.R b/ToBeConvertedToPkg/meCortex/e17/app.R deleted file mode 100644 index 241e89f..0000000 --- a/ToBeConvertedToPkg/meCortex/e17/app.R +++ /dev/null @@ -1,1036 +0,0 @@ -######## User-defined variables ######## - -dataPath <- "e17_Cortical_Only_forViz.RData" -## ^ Point this to the output file from PrepareInputs.R -## If you set a default resolution in the Shiny app, it will save to the same directory. - -vizScriptPath <- "./" -## ^ Point this to the directory in which the "app.R" Shiny script resides - -species <- "mouse" -## ^ Set species ("mouse"/"human"). -## If other, add the annotation database from Bioconductor to the egDB <- switch() expression below. - -#### List known cell-type markers #### -cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna","Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b","Tle4", - "Nes","Cux1","Cux2","Tubb3","Rbfox3","Dcx"), - "Oligodendrocyte precursors"=c("Cspg4","Olig2","Pdgfra"), - "Oligodendrocytes"=c("Mbp","Mog","Plp1","Mag"), - "Astrocytes"=c("Aldh1l1","Gfap","Slc1a3","Glul"), - "Microglia"="Cx3cr1") -#cellMarkers <- list() -## ^ If you have canonical marker genes for expected cell types, list them here -## (see example above from mouse embryonic cortex). The Shiny app will attempt -## to label clusters in the tSNE projection by highest median gene expression. -## Otherwise leave the list blank (uncomment line above). - -######################################## - - - -######## Code to run the Shiny app ######## -library(markdown) -library(shiny) -library(cluster) -library(gplots) -library(scales) -library(viridis) -library(RColorBrewer) -library(TeachingDemos) - -library(org.Mm.eg.db) -egDB <- "org.Mm.eg.db" - -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -if (length(cellMarkers) < 1) { - cellMarkersS <- cellMarkersU <- list() -} else { - cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2,function(X) do.call(intersect,unname(cellMarkers[X]))) - try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2,function(X) paste(X,collapse="&")),silent=T) - cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0] - cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)]) -} - -demoRegex <- switch(species,mouse="^Actb$",human="^ACTB$") - -load(dataPath) -temp_dataPath <- strsplit(dataPath,"/|\\\\") -dataPath <- sub(temp_dataPath[[1]][length(temp_dataPath[[1]])],"",dataPath) -if (dataPath == "") { dataPath <- "./" } -dataTitle <- sub("\\..+$|_forViz\\..+$","",temp_dataPath[[1]][length(temp_dataPath[[1]])]) -rm(temp_dataPath) - -if (file.exists(paste0(dataPath,dataTitle,"_savedRes.RData"))) { - load(paste0(dataPath,dataTitle,"_savedRes.RData")) -} else { - savedRes <- NULL -} - -if (!file.exists(paste0(dataPath,"intro.md"))) { - write(paste0(dataTitle,": You can add to this preamble by editting ",dataPath,"intro.md"), - file=paste0(dataPath,"intro.md")) -} - -silDist <- dist(dr_clust,method="euclidean") -## ^ precalculating distances in reduced dimensionality space for the silhouette plot. - -for (l in names(CGS)) { - for (i in names(CGS[[l]])) { - CGS[[l]][[i]]$MTCrank <- rank(CGS[[l]][[i]]$MTC,ties.method="min")/nrow(CGS[[l]][[i]]) - CGS[[l]][[i]]$cMu <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersU) - CGS[[l]][[i]]$cMs <- rownames(CGS[[l]][[i]]) %in% unlist(cellMarkersS) - CGS[[l]][[i]]$overCut <- CGS[[l]][[i]]$MTC > mean(CGS[[l]][[i]]$MTC) - CGS[[l]][[i]]$genes <- rownames(CGS[[l]][[i]]) - } -} - -if (length(cellMarkers) < 1) { - clusterID <- sapply(colnames(cl),function(X) rep("",nrow(cl)),simplify=F) -} else { - clusterID <- sapply(CGS,function(Z) { - temp <- names(cellMarkers)[sapply(Z,function(Y) - which.max(sapply(cellMarkers,function(X) median(Y$MTC[rownames(Y) %in% X]))))] - names(temp) <- names(Z) - return(temp) - },simplify=F) -} - -#### Run the Shiny App! #### - - -########## UI ########## -ui <- fixedPage( - fixedRow( - titlePanel(paste("scClustViz -",dataTitle)), - includeMarkdown(paste0(dataPath,"intro.md")) - ), - hr(), - - ######## Cluster Resolution Selection ######## - fixedRow(titlePanel("Cluster Resolution Selection"), - column(6, - fixedRow(column(6,uiOutput("resSelect"),align="left"), - column(6,align="right", - actionButton("go","View clusters at this resolution",icon("play")), - actionButton("save","Save this resolution as default",icon("bookmark")))), - radioButtons("deType",NULL,list("# of marker genes per cluster"="deMarker", - "# of DE genes to nearest neighbouring cluster"="deNeighb"),inline=T), - plotOutput("cqPlot",height="500px")), - column(6,plotOutput("sil",height="600px")) - ), - fixedRow( - column(6,downloadButton("cqPlotSave","Save as PDF"),align="left"), - column(6,downloadButton("silSave","Save as PDF"),align="right") - ), - hr(), - - ######## Cell-type Clusters ######## - fixedRow(titlePanel("Cell-type Clusters")), - fixedRow( - column(6, - if (length(cellMarkers) > 0) { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn","Cluster annotations"="ca")) - } else { - radioButtons("tsneLabels","Labels:",inline=T, - choices=list("Cluster numbers"="cn")) - }, - strong("Click point on plot below to select cluster")), - column(3,selectInput("mdScatterX","x axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_counts"),align="left"), - column(3,selectInput("mdScatterY","y axis:", - choices=colnames(md)[!sapply(md,function(X) is.factor(X) | is.character(X))], - selected="total_features"),align="left") - ), - fixedRow( - column(6,plotOutput("tsne",height="570px",click="tsneClick")), - column(6,plotOutput("mdScatter",height="570px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneSave","Save as PDF")), - column(6,align="right",downloadButton("mdScatterSave","Save as PDF")) - ), - hr(), - - fixedRow( - column(6,selectInput("tsneMDcol","Metadata:",choices=colnames(md), - selected=grep("phase",colnames(md),value=T,ignore.case=T)[1])), - column(3,selectInput("mdFactorData","Metadata (factor):", - choices=colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - selected=grep("phase", - colnames(md)[sapply(md,function(X) is.factor(X) | is.character(X))], - value=T,ignore.case=T)[1])), - column(3,radioButtons("mdFactorRA","Factor counts per cluster:",inline=T, - choices=list("Absolute"="absolute","Relative"="relative"))) - ), - fixedRow( - column(6,plotOutput("tsneMD",height="570px")), - column(6,plotOutput("mdFactor",height="570px")) - ), - fixedRow( - column(6,align="left",downloadButton("tsneMDSave","Save as PDF")), - column(6,align="right",downloadButton("mdFactorSave","Save as PDF")) - ), - hr(), - - ######## Cluster-wise Gene Stats ######### - fixedRow(titlePanel("Cluster-wise Gene Stats")), - fixedRow( - column(2,radioButtons("heatG","Heapmap Genes:", - choices=list("DE vs tissue average"="deTissue", - "Marker genes"="deMarker", - "DE vs neighbour"="deNeighb"))), - column(2,uiOutput("DEclustSelect")), - column(2,downloadButton("deGeneSave","Download gene list"), - downloadButton("heatmapSave","Save as PDF"),align="right"), - column(6,uiOutput("DEgeneSlider")) - ), - fixedRow(plotOutput("heatmap",height="600px")), - hr(), - - fixedRow( - column(1,uiOutput("genePlotClustSelect")), - column(6,if (length(cellMarkers) > 0) { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Cell-type markers"="markers", - "Gene symbols (regex)"="regex", - "Top DE genes (from heatmap)"="heatmap")) - } else { - radioButtons("cgLegend",inline=T,label="Highlighted genes:", - choices=c("Gene symbols (regex)"="regex", - "Top DE genes (from heatmap)"="heatmap")) - }), - column(4,align="right",textInput("GOI","Gene symbols (regex)",demoRegex)), - column(1,actionButton("GOIgo","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOIgo { margin-top: 25px; }"), - fixedRow( - plotOutput("clusterGenes",height="600px",click="cgClick"), - downloadButton("clusterGenesSave","Save as PDF") - ), - hr(), - - fixedRow( - column(4,uiOutput("cgSelect")), - column(8, - radioButtons("boxplotGene",inline=T,label="Gene of interest:", - choices=c("Click from plot above"="click", - "From gene symbols (regex entry)"="regex")), - checkboxGroupInput("bxpOpts",label=NULL,selected=c("sct","rnk"),inline=T, - choices=list("Include scatterplot"="sct", - "Include gene rank"="rnk"))) - ), - fixedRow(plotOutput("geneTest",height="500px"), - downloadButton("geneTestSave","Save as PDF") - ), - hr(), - - ######## Distribution of genes of interest ######### - fixedRow(titlePanel("Distribution of Genes of Interest")), - fixedRow( - column(4, - fixedRow( - radioButtons("plotClust1",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel1",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI1",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI1go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI1go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI1select")), - column(4, - fixedRow( - radioButtons("plotClust2",inline=T,label="Plot:",selected="goi", - choices=list("clusters"="clust","gene expression overlay"="goi")), - checkboxInput("plotLabel2",label="Include cluster labels",value=T) - ), - fixedRow( - column(9,textInput("GOI2",label="Gene symbols (regex):",demoRegex)), - column(3,actionButton("GOI2go","Search",icon=icon("search"))) - ),tags$style(type='text/css', "button#GOI2go { margin-top: 25px; margin-left: -25px; }") - ), - column(2,uiOutput("GOI2select")) - ), - fixedRow( - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")), - column(6,strong("If multiple genes are selected, the max expression per cell will be displayed")) - ), - fixedRow( - column(6,plotOutput("goiPlot1",height="600px")), - column(6,plotOutput("goiPlot2",height="600px")) - ), - fixedRow( - column(6,align="left",downloadButton("goiPlot1Save","Save as PDF")), - column(6,align="right",downloadButton("goiPlot2Save","Save as PDF")) - ), - h1() -) - - - -########## Server ########## -server <- function(input,output,session) { - clustCols <- reactive({ - if (length(levels(cl[,input$res])) <= 8) { - brewer.pal(length(levels(cl[,input$res])),"Dark2")[1:length(levels(cl[,input$res]))] - } else { - rainbow2(length(levels(cl[,input$res]))) - } - }) - - - ######## Cluster Resolution Selection ######## - #### Inter-cluster DE boxplots #### - numClust <- sapply(cl,function(X) length(levels(X))) - clustList <- as.list(colnames(cl)) - names(clustList) <- paste0(unlist(clustList),": ",numClust," clusters") - output$resSelect <- renderUI({ - selectInput("res","Resolution:",choices=clustList,selected=savedRes) - }) - numClust <- numClust[numClust > 1] - - plot_cqPlot <- function() { - numDEgenes <- lapply(get(input$deType),function(X) sapply(X,nrow)) - toplim <- c(21,max(unlist(numDEgenes)) + 20) - botlim <- c(-1,21) - - par(mar=c(0.2,3.5,1,1),mgp=2:0,mfrow=c(2,1)) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=toplim,yaxs="i",xaxt="n",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - - par(mar=c(3,3.5,0.2,1),mgp=2:0) - plot(x=numClust,y=sapply(numDEgenes,median),type="l", - xlim=range(numClust)+c(-.5,.5),ylim=botlim,yaxs="i",xlab="Number of clusters",ylab=NA) - abline(h=seq(0,max(unlist(numDEgenes)),10),lty=3,col=alpha(1,0.3)) - for (i in names(numDEgenes)[names(numDEgenes) != input$res]) { - boxplot(numDEgenes[[i]],add=T,at=numClust[i],yaxt="n") - } - if (any(names(numDEgenes) == input$res)) { - boxplot(numDEgenes[[input$res]],add=T,at=numClust[input$res],border="red") - } - mtext(switch(input$deType, - "deMarker"="Positive DE genes per cluster to all other clusters", - "deNeighb"="Positive DE genes per cluster to nearest cluster") - ,side=2,line=2.5,at=botlim[2],xpd=NA) - - } - - output$cqPlot <- renderPlot({ - print(plot_cqPlot()) - }) - - output$cqPlotSave <- downloadHandler( - filename="cqPlot.pdf", - content=function(file) { - pdf(file,width=6,height=5) - print(plot_cqPlot()) - dev.off() - } - ) - - #### Silhouette plot #### - plot_sil <- function() { - tempSil <- silhouette(as.integer(cl[,input$res]),dist=silDist) - par(mar=c(4,0,2,1),mgp=2:0) - if (length(tempSil) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Silhouette plot cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - plot(tempSil,beside=T,border=NA,main=NA,col=clustCols(),do.n.k=T) - } - } - - output$sil <- renderPlot({ - print(plot_sil()) - }) - - output$silSave <- downloadHandler( - filename="sil.pdf", - content=function(file) { - pdf(file,width=9,height=12) - print(plot_sil()) - dev.off() - } - ) - - #### res buttons #### - res <- eventReactive(input$go,input$res,ignoreNULL=F) - - observeEvent(input$save,{ - savedRes <<- input$res #<<- updates variable outside scope of function (ie. global environment) - save(savedRes,file=paste0(dataPath,dataTitle,"_savedRes.RData")) - }) - - - ######## Cell-type Clusters ######## - clusts <- reactive(cl[,res()]) - - #### Cell-type tSNE #### - plot_tsne_labels <- function() { - if (input$tsneLabels == "ca") { - temp_labelNames <- sapply(unique(clusterID[[res()]]),function(X) - names(which(clusterID[[res()]] == X)),simplify=F) - temp_labels <- apply(dr_viz,2,function(Y) - tapply(Y,apply(sapply(temp_labelNames,function(X) clusts() %in% X),1,which),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=names(temp_labelNames),font=2,cex=1.5) - } else if (input$tsneLabels == "cn") { - temp_labels <- apply(dr_viz,2,function(X) tapply(X,clusts(),mean)) - if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) } - text(temp_labels,labels=levels(clusts()),font=2,cex=1.5) - } else { - legend("center",legend="You changed the label choice names...") - } - } - - plot_tsne <- function() { - par(mar=c(4,3,3,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - main=paste("tSNE at",res(),"using",ncol(dr_clust),"PCs"), - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(clustCols()[clusts()],0.2)[!ci()], - bg=alpha(clustCols()[clusts()],0.1)[!ci()]) - points(dr_viz[ci(),],pch=21, - col=alpha(clustCols()[clusts()],1)[ci()], - bg=alpha(clustCols()[clusts()],0.5)[ci()]) - } else { - points(dr_viz,pch=21, - col=alpha(clustCols()[clusts()],1), - bg=alpha(clustCols()[clusts()],0.5)) - } - if (hiC() != "") { - mtext(side=3,line=-1,text=paste("Cluster",hiC(),"-", - clusterID[[res()]][hiC()],"-", - sum(clusts() == hiC()),"cells")) - } - } - - output$tsne <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsne()) - print(plot_tsne_labels()) - } - }) - - output$tsneSave <- downloadHandler( - filename="tsne.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_tsne()) - dev.off() - } - ) - - #### clusterSelect #### - - - clusterSelect <- reactiveValues(cl=NULL) - - observeEvent(input$tsneClick,{ clusterSelect$cl <- input$tsneClick }) - - cSelected <- reactive({ - t <- nearPoints(as.data.frame(dr_viz),clusterSelect$cl,xvar="tSNE_1",yvar="tSNE_2",threshold=5) - t2 <- cl[rownames(t)[1],res()] - if (is.na(t2)) { return("") } else { return(t2) } - }) - - hiC <- reactive({ - if (length(res()) < 1) { - return("") - } else if (input$genePlotClust != "") { - cl[which(cl[,res()] == input$genePlotClust)[1],res()] - } else { - return(input$genePlotClust) - } - }) - - ci <- reactive({ - if (hiC() == "") { - rep(F,length(clusts())) - } else { - clusts() == hiC() - } - }) - - #### Metadata tSNE overlay #### - plot_tsneMD <- function() { - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - id <- as.factor(md[,input$tsneMDcol]) - if (length(levels(md[,input$tsneMDcol])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$tsneMDcol])), - "Dark2")[1:length(levels(md[,input$tsneMDcol]))] - } else { - idcol <- rainbow2(length(levels(md[,input$tsneMDcol]))) - } - } else { - id <- cut(md[,input$tsneMDcol],100) - idcol <- viridis(100,d=-1) - } - layout(cbind(2:1),heights=c(1,9)) - par(mar=c(3,3,0,1),mgp=2:0) - plot(x=NULL,y=NULL,xlab="tSNE_1",ylab="tSNE_2", - xlim=range(dr_viz[,1]),ylim=range(dr_viz[,2])) - if (any(ci())) { - points(dr_viz[!ci(),],pch=21, - col=alpha(idcol,.1)[id[!ci()]], - bg=alpha(idcol,0.05)[id[!ci()]]) - points(dr_viz[ci(),],pch=21, - col=alpha(idcol,.8)[id[ci()]], - bg=alpha(idcol,0.4)[id[ci()]]) - } else { - points(dr_viz,pch=21, - col=alpha(idcol,.8)[id], - bg=alpha(idcol,0.4)[id]) - } - plot_tsne_labels() - if (is.factor(md[,input$tsneMDcol]) | is.character(md[,input$tsneMDcol])) { - par(mar=c(0,0,0,0)) - plot.new() - legend("bottom",bty="n",horiz=T,pch=c(NA,rep(21,length(levels(md[,input$tsneMDcol])))), - legend=c(paste0(input$tsneMDcol,":"),levels(md[,input$tsneMDcol])), - col=c(NA,idcol),pt.bg=c(NA,alpha(idcol,0.5))) - } else { - par(mar=c(0,5,3,3)) - barplot(rep(1,100),space=0,col=idcol,xaxt="n",yaxt="n",border=NA,main=input$tsneMDcol) - text(x=c(1,100),y=1,pos=c(2,4),xpd=NA,labels=round(range(md[,input$tsneMDcol]),2)) - } - } - - output$tsneMD <- renderPlot({ - if (length(res()) > 0) { - print(plot_tsneMD()) - } - }) - - output$tsneMDSave <- downloadHandler( - filename="tsneMD.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_tsneMD()) - dev.off() - } - ) - - #### Metadata Factor Barplot #### - plot_mdFactor <- function() { - id <- switch(input$mdFactorRA, - "relative"=tapply(md[,input$mdFactorData],clusts(), - function(X) table(X) / length(X)), - "absolute"=tapply(md[,input$mdFactorData],clusts(),table)) - if (is.list(id)) { id <- do.call(cbind,id) } - idylab <- switch(input$mdFactorRA, - "relative"="Proportion per cell type", - "absolute"="Counts per cell type") - if (length(levels(md[,input$mdFactorData])) <= 8) { - idcol <- brewer.pal(length(levels(md[,input$mdFactorData])), - "Dark2")[1:length(levels(md[,input$mdFactorData]))] - } else { - idcol <- rainbow2(length(levels(md[,input$mdFactorData]))) - } - par(mar=c(3,3,4,1),mgp=2:0) - barplot(id,col=idcol,ylab=idylab, - legend.text=levels(md[,input$mdFactorData]), - args.legend=list(x="topright",horiz=T,inset=c(0,-.08),bty="n")) - mtext(input$mdFactorData,side=3,adj=0,font=2,line=1,cex=1.2) - } - - output$mdFactor <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdFactor()) - } - }) - - output$mdFactorSave <- downloadHandler( - filename="mdFactor.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_mdFactor()) - dev.off() - } - ) - - #### Metadata Scatterplot #### - plot_mdScatter <- function() { - layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5)) - par(mar=c(3,3,0,0),mgp=2:0,cex=1.1) - if (all(ci())) { - plot(md[,input$mdScatterX],md[,input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2), - xlab=input$mdScatterX,ylab=input$mdScatterY) - } else { - plot(md[!ci(),input$mdScatterX],md[!ci(),input$mdScatterY], - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab=input$mdScatterX,ylab=input$mdScatterY) - points(md[ci(),input$mdScatterX],md[ci(),input$mdScatterY], - pch=21,col=alpha("red",0.4),bg=alpha("red",0.2)) - } - if (any(ci())) { - legend("topleft",bty="n",pch=21,col="red",pt.bg=alpha("red",0.5), - legend=paste("Cluster",hiC(),"-",clusterID[[res()]][hiC()])) - } - par(mar=c(0,3,1,0)) - boxplot(tapply(md[,input$mdScatterX],ci(),c), - horizontal=T,xaxt="n",yaxt="n",border=c("black","red")) - par(mar=c(3,0,0,1)) - boxplot(tapply(md[,input$mdScatterY],ci(),c), - horizontal=F,xaxt="n",yaxt="n",border=c("black","red")) - } - - output$mdScatter <- renderPlot({ - if (length(res()) > 0) { - print(plot_mdScatter()) - } - }) - - output$mdScatterSave <- downloadHandler( - filename="mdScatter.pdf", - content=function(file) { - pdf(file,width=10,height=10) - print(plot_mdScatter()) - dev.off() - } - ) - - - ######## Cluster-wise Gene Stats ######### - - #### Heatmap genes #### - output$DEgeneSlider <- renderUI({ - if (length(res()) > 0) { - switch(input$heatG, - deTissue= - sliderInput("DEgeneCount",min=2,max=max(sapply(deTissue[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression of cluster over tissue", - "# of genes per cluster to show",sep="
"))), - deMarker= - sliderInput("DEgeneCount",min=2,max=max(sapply(deMarker[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression between cluster and all other clusters", - "# of genes per cluster to show",sep="
"))), - deNeighb= - sliderInput("DEgeneCount",min=2,max=max(sapply(deNeighb[[res()]],nrow)), - value=5,step=1,ticks=T,width="100%", - label=HTML(paste("Positive differential gene expression between cluster and nearest neighbour", - "# of genes per cluster to show",sep="
")))) - } - }) - - output$DEclustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("DEclustNum","Cluster # for gene list",choices=levels(clusts())) - } - }) - - heatGenes <- reactive({ - temp <- unique(unlist(lapply(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]]), - function(X) - if (nrow(X) == 0) { NA } else { rownames(X)[1:input$DEgeneCount] }))) - temp <- temp[!is.na(temp)] - return(temp) - }) - - clustMeans <- reactive({ #This only works if input is in ascending order of adjusted p value. - temp <- sapply(CGS[[res()]],function(X) X[heatGenes(),"MTC"]) - rownames(temp) <- heatGenes() - return(t(temp)) - }) - - hC <- reactive(hclust(dist(clustMeans()),"single")) - hG <- reactive(hclust(dist(t(clustMeans())),"complete")) - - sepClust <- reactive({ - if (hiC() == "") { - return(c(NA,NA)) - } else { - return(nrow(clustMeans()) - - c(which(levels(clusts())[hC()$order] == hiC()) - 1, - which(levels(clusts())[hC()$order] == hiC()))) - } - }) - - plot_heatmap <- function() { - if (length(levels(clusts())) <= 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Heatmap cannot be computed", - "with less than two clusters.",sep="\n")) - } else { - tempLabRow <- paste(paste0("Cluster ",levels(clusts())), - paste(sapply(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]]),nrow),"DE"), - sep=": ") - heatmap.2(clustMeans(),Rowv=as.dendrogram(hC()),Colv=as.dendrogram(hG()),scale="column", - col=viridis(100,d=-1),trace="none",margins=c(9,12),keysize=1,lhei=c(2,10),lwid=c(1,11), - cexCol=1 + 1/log2(nrow(clustMeans())),cexRow=1 + 1/log2(ncol(clustMeans())), - RowSideColors=clustCols(),labRow=tempLabRow,rowsep=sepClust()) - } - } - - output$heatmap <- renderPlot({ - if (length(res()) > 0) { - print(plot_heatmap()) - } - }) - - output$heatmapSave <- downloadHandler( - filename="heatmap.pdf", - content=function(file) { - pdf(file,width=9,height=12) - print(plot_heatmap()) - dev.off() - } - ) - - output$deGeneSave <- downloadHandler( - filename=function() { paste0(input$heatG,"_",input$DEclustNum,".txt") }, - content=function(file) { - outTable <- switch(input$heatG, - deTissue=deTissue[[res()]][[input$DEclustNum]], - deMarker=deMarker[[res()]][[input$DEclustNum]], - deNeighb=deNeighb[[res()]][[input$DEclustNum]]) - write.table(outTable,file,quote=F,sep="\t",row.names=T,col.names=NA) - } - ) - - - #### clusterGenes #### - output$genePlotClustSelect <- renderUI({ - if (length(res()) > 0) { - selectInput("genePlotClust","Cluster:",choices=c("",levels(clusts())),selected=cSelected()) - } - }) - - cellMarkCols <- reactive(rainbow2(length(cellMarkers))) - - GOI <- eventReactive(input$GOIgo,grep(input$GOI,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - - plot_clusterGenes <- function() { - doubleDot <- function(col1,col2) { - upper.half.circle <- function(col1){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border=NA) - } - lower.half.circle <- function(col2){ - rs <- seq(0,pi,len=100) + pi/2 - xc <- 0-cos(rs) - yc <- 0-sin(rs) - polygon(xc,yc,col=col2,border=NA) - } - upper.half.circle(col1) - lower.half.circle(col2) - rs <- seq(0,2*pi,len=200) - polygon(cos(rs),sin(rs),border="white") - } - singleDot <- function(col1){ - rs <- seq(0,2*pi,len=200) - xc <- 0+cos(rs) - yc <- 0+sin(rs) - polygon(xc,yc,col=col1,border="white") - } - par(mar=c(3,3,3,20),mgp=2:0) - if (hiC() == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above", - "to see gene expression for that cluster.",sep="\n")) - } else { - plot(MDTC~DR, - data=CGS[[res()]][[hiC()]][ - !((CGS[[res()]][[hiC()]]$cMu | CGS[[res()]][[hiC()]]$cMs) & CGS[[res()]][[hiC()]]$overCut),], - col=alpha("black",0.3), - xlab="Proportion of cells detecting gene", - ylab="Mean normalized gene expression of detected genes") - title(paste0("Cluster ", hiC(),": ",clusterID[[res()]][hiC()]),cex=1.2) - mtext(paste("Cells:",sum(clusts()==hiC()), - " Genes detected:",length(CGS[[res()]][[hiC()]]$DR)),side=3,line=0,cex=0.9) - box(col=clustCols()[hiC()],lwd=2) - - if (input$cgLegend == "markers") { - for (x in which(CGS[[res()]][[hiC()]]$cMu)) { - my.symbols(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x],symb=singleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[which(sapply(cellMarkersU,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))])) - } - for (x in which(CGS[[res()]][[hiC()]]$cMs)) { - temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))),"&")) - my.symbols(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x],symb=doubleDot,inches=0.1, - MoreArgs=list(col1=cellMarkCols()[as.integer(temp[1])], - col2=cellMarkCols()[as.integer(temp[2])])) - } - for (x in which(CGS[[res()]][[hiC()]]$cMu & CGS[[res()]][[hiC()]]$overCut)) { - text(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x], - labels=CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[which(sapply(cellMarkersU,function(X) - CGS[[res()]][[hiC()]]$genes[x] %in% X))]) - } - for (x in which(CGS[[res()]][[hiC()]]$cMs & CGS[[res()]][[hiC()]]$overCut)) { - text(x=CGS[[res()]][[hiC()]]$DR[x],y=CGS[[res()]][[hiC()]]$MDTC[x], - labels=CGS[[res()]][[hiC()]]$genes[x],srt=315,cex=1.5,font=2,adj=c(1.1,-.1), - col=cellMarkCols()[as.integer(temp[2])]) - } - legend(x=1.05,y=max(CGS[[res()]][[hiC()]]$MDTC),xpd=NA,bty="n",ncol=1, - pch=19,col=cellMarkCols(),legend=names(cellMarkersU)) - - } else if (input$cgLegend == "heatmap") { - degl <- rownames(CGS[[res()]][[hiC()]]) %in% - rownames(switch(input$heatG, - deTissue=deTissue[[res()]], - deMarker=deMarker[[res()]], - deNeighb=deNeighb[[res()]])[[hiC()]])[1:input$DEgeneCount] - if (any(degl)) { - points(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred", - labels=CGS[[res()]][[hiC()]]$genes[degl]) - } - - } else if (input$cgLegend == "regex" & length(GOI()) > 0) { - degl <- which(rownames(nge) %in% GOI()) - points(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - pch=16,cex=1.2,col="darkred") - text(x=CGS[[res()]][[hiC()]]$DR[degl],y=CGS[[res()]][[hiC()]]$MDTC[degl], - srt=315,cex=1.5,font=2,adj=c(1.1,-.1),col="darkred",labels=CGS[[res()]][[hiC()]]$genes[degl]) - } - } - } - - output$clusterGenes <- renderPlot({ - if (length(res()) > 0) { - print(plot_clusterGenes()) - } - }) - - output$clusterGenesSave <- downloadHandler( - filename="clusterGenes.pdf", - content=function(file) { - pdf(file,width=12,height=9) - print(plot_clusterGenes()) - dev.off() - } - ) - - #### Gene Stats Plot #### - cgGeneOpts <- reactive({ - t <- nearPoints(CGS[[res()]][[hiC()]],input$cgClick,xvar="DR",yvar="MDTC") - return(t$genes) - }) - - output$cgSelect <- renderUI({ - if (length(res()) > 0) { - if (input$boxplotGene == "click") { - selectInput("cgGene",label="Gene:",choices=sort(cgGeneOpts())) - } else if (input$boxplotGene == "regex") { - selectInput("cgGene",label="Gene:",choices=sort(GOI())) - } - } - }) - - plot_geneTest <- function() { - if (input$cgGene == "") { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("Select a gene by either clicking on the plot above", - "or entering regular expression capturing your gene symbol of interest", - "then pick the gene from the list just above this figure", - "to see a comparison of that gene's expression across all clusters.",sep="\n")) - } else { - temp_pos <- switch(as.character(length(levels(clusts())) > 1),"TRUE"=hC()$order,"FALSE"=1) - layout(matrix(2:1,nrow=2),heights=c(1,4)) - par(mar=c(3,3,0,3),mgp=2:0) - suppressWarnings(boxplot(vector("list",length(levels(clusts()))), - ylim=range(nge[input$cgGene,]), - ylab=paste(input$cgGene,"gene expression (log2)"), - xlab=NA,xaxt="n")) - mtext(levels(clusts())[temp_pos],side=1,line=0,at=seq_along(temp_pos)) - mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1) - try(tempGeneName <- select(get(egDB),keys=input$cgGene, - keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - mtext(paste(paste("Gene name:",tempGeneName),collapse="\n"), - side=1,line=2,font=2) - } - if ("sct" %in% input$bxpOpts) { - bxpCol <- alpha(clustCols(),.2) - } else { - bxpCol <- alpha(clustCols(),.8) - } - for (i in temp_pos) { - boxplot(nge[input$cgGene,clusts() == levels(clusts())[i]], - col=bxpCol[i],at=which(temp_pos == i),add=T,notch=T,outline=F) - if ("sct" %in% input$bxpOpts) { - points(jitter(rep(which(temp_pos == i),sum(clusts() == levels(clusts())[i])),amount=.2), - nge[input$cgGene,clusts() == levels(clusts())[i]],pch=20,col=alpha(clustCols()[i],.4)) - } - } - if ("rnk" %in% input$bxpOpts) { - points(x=seq_along(CGS[[res()]]), - y=sapply(CGS[[res()]][temp_pos],function(X) X[input$cgGene,"MTCrank"]) * - max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - pch=25,cex=1.2,col="darkred",bg="firebrick2") - axis(side=4,at=seq(0,1,.25) * max(nge[input$cgGene,]) + min(nge[input$cgGene,]), - labels=percent(seq(0,1,.25)),col.ticks="darkred",col.axis="darkred") - mtext(side=4,line=2,text="Quantile of gene expression per cluster",col="darkred") - } - if (length(temp_pos) > 1) { - par(new=F,mar=c(0,3,1,3)) - plot(as.dendrogram(hC()),leaflab="none") - } - } - } - - output$geneTest <- renderPlot({ - if (length(res()) > 0) { - print(plot_geneTest()) - } - }) - - output$geneTestSave <- downloadHandler( - filename="geneTest.pdf", - content=function(file) { - pdf(file,width=12,height=9) - print(plot_geneTest()) - dev.off() - } - ) - - - ######## Distribution of genes of interest ######### - - GOI1 <- eventReactive(input$GOI1go,grep(input$GOI1,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - output$GOI1select <- renderUI({ - selectInput("goi1",label="Gene:",choices=sort(GOI1()),multiple=T) - }) - - GOI2 <- eventReactive(input$GOI2go,grep(input$GOI2,rownames(nge),value=T,ignore.case=T),ignoreNULL=F) - output$GOI2select <- renderUI({ - selectInput("goi2",label="Gene:",choices=sort(GOI2()),multiple=T) - }) - - plot_goi <- function(goi) { - if (length(goi) < 1) { - plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA) - text(.5,.5,paste("To search for your gene(s) of interest type a", - "search term (regex allowed) in the box above", - "then select the gene(s) from the drop-down list", - "in the \"Gene:\" box above right.",sep="\n")) - } else { - if (length(goi) > 5) { goiL <- 5 } else { goiL <- length(goi) } - if (goiL > 1) { - gv <- apply(nge[goi,],2,max) - } else { - gv <- nge[goi,] - } - cv <- cut(gv,breaks=100,labels=F) - par(mar=c(3,3,goiL+1,1),mgp=2:0) - plot(dr_viz,pch=21,cex=1.3,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(100,.7,d=-1)[cv],bg=viridis(100,.3,d=-1)[cv]) - temp_yrange <- max(dr_viz[,2]) - min(dr_viz[,2]) - segments(x0=seq(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.95),length.out=1000), - y0=max(dr_viz[,2]) + temp_yrange * .045, - y1=max(dr_viz[,2]) + temp_yrange * .065, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(dr_viz[,1]),.55), - quantile(range(dr_viz[,1]),.75), - quantile(range(dr_viz[,1]),.95)), - y=rep(max(dr_viz[,2]) + temp_yrange * .06,3), - labels=c(round(min(gv),2),"Max expression per cell",round(max(gv),2)),pos=2:4,xpd=NA) - try(tempGeneName <- select(get(egDB),keys=goi,keytype="SYMBOL",column="GENENAME")$GENENAME,silent=T) - if (exists("tempGeneName")) { - if (length(tempGeneName) > 4) { tempGeneName[5] <- "and more..."; tempGeneName <- tempGeneName[1:5] } - title(paste(tempGeneName,collapse="\n"),line=0.25,adj=.01,font.main=1) - } - } - } - - output$goiPlot1 <- renderPlot({ - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot1Save <- downloadHandler( - filename="goi1.pdf", - content=function(file) { - pdf(file,width=10,height=10) - if (input$plotClust1 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel1) { print(plot_tsne_labels()) } - } else if (input$plotClust1 == "goi") { - print(plot_goi(input$goi1)) - if (input$plotLabel1 & length(res()) > 0 & length(input$goi1) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - - output$goiPlot2 <- renderPlot({ - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - }) - - output$goiPlot2Save <- downloadHandler( - filename="goi2.pdf", - content=function(file) { - pdf(file,width=10,height=10) - if (input$plotClust2 == "clust" & length(res()) > 0) { - print(plot_tsne()) - if (input$plotLabel2) { print(plot_tsne_labels()) } - } else if (input$plotClust2 == "goi") { - print(plot_goi(input$goi2)) - if (input$plotLabel2 & length(res()) > 0 & length(input$goi2) > 0) { - print(plot_tsne_labels()) - } - } - dev.off() - } - ) - -} - - -########## ShinyApp ########## -shinyApp(ui = ui, server = server) diff --git a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_deVS.RData b/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_deVS.RData deleted file mode 100644 index 3a12bb4..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_deVS.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_forViz.RData b/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_forViz.RData deleted file mode 100644 index b4a3fbe..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_forViz.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_savedRes.RData b/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_savedRes.RData deleted file mode 100644 index 3f535ee..0000000 Binary files a/ToBeConvertedToPkg/meCortex/e17/e17_Cortical_Only_savedRes.RData and /dev/null differ diff --git a/ToBeConvertedToPkg/meCortex/e17/intro.md b/ToBeConvertedToPkg/meCortex/e17/intro.md deleted file mode 100644 index 73708d8..0000000 --- a/ToBeConvertedToPkg/meCortex/e17/intro.md +++ /dev/null @@ -1,2 +0,0 @@ -Welcome to the data portal for the 2017 Cell Reports paper [Developmental Emergence of Adult Neural Stem Cells as Revealed by Single-Cell Transcriptional Profiling](https://doi.org/10.1016/j.celrep.2017.12.017) by Yuzwa *et al.*, brought to you by [scClustViz](https://baderlab.github.io/scClustViz). These are embryonic day 17.5 cortically-derived cells. -Other datasets: [E11.5 Cerebral Cortex](https://innesbt.shinyapps.io/e11cortex/); [E13.5 Cerebral Cortex](https://innesbt.shinyapps.io/e13cortex/); [E15.5 Cerebral Cortex](https://innesbt.shinyapps.io/e15cortex/) diff --git a/ToBeConvertedToPkg/meCortex/e17/rsconnect/shinyapps.io/innesbt/e17cortex.dcf b/ToBeConvertedToPkg/meCortex/e17/rsconnect/shinyapps.io/innesbt/e17cortex.dcf deleted file mode 100644 index 8144ab5..0000000 --- a/ToBeConvertedToPkg/meCortex/e17/rsconnect/shinyapps.io/innesbt/e17cortex.dcf +++ /dev/null @@ -1,10 +0,0 @@ -name: e17cortex -title: -username: -account: innesbt -server: shinyapps.io -hostUrl: https://api.shinyapps.io/v1 -appId: 365580 -bundleId: 1444310 -url: https://innesbt.shinyapps.io/e17cortex/ -when: 1529722972.84331 diff --git a/ToBeConvertedToPkg/meCortex/exampleSHINYAPPSIOupload.R b/ToBeConvertedToPkg/meCortex/exampleSHINYAPPSIOupload.R deleted file mode 100644 index 4b5baa4..0000000 --- a/ToBeConvertedToPkg/meCortex/exampleSHINYAPPSIOupload.R +++ /dev/null @@ -1,12 +0,0 @@ -# Note: Change egDB code from main RunVizScript to mouse only version from e1*/app.R - -tp <- "e13" - -library(rsconnect) -options(repos = BiocInstaller::biocinstallRepos()) -setwd(paste0("meCortex/",tp)) - -deployApp(appName=paste0(tp,"cortex"), - appFiles=c("app.R","intro.md", - paste0(tp,"_Cortical_Only_forViz.RData"), - paste0(tp,"_Cortical_Only_savedRes.RData"))) diff --git a/ToBeConvertedToPkg/paper/Fig1.R b/ToBeConvertedToPkg/paper/Fig1.R deleted file mode 100644 index 98404ee..0000000 --- a/ToBeConvertedToPkg/paper/Fig1.R +++ /dev/null @@ -1,55 +0,0 @@ -## See https://baderlab.github.io/scRNAseq_meCortex/PipelineV2/pseudocountTest -library(scales) -library(RColorBrewer) - -mean.logX <- function(data,ex=2,pc=1,pc.out) { log(mean(ex^data - pc) + pc.out,base=ex) } -halfway <- 10 - -testData <- t(sapply(seq(0,.099,.001),function(prob) rbinom(1e3,100,prob))) -GER <- rowMeans(testData)[halfway] / rowMeans(testData) -testMeans <- rowMeans(testData) -logTestData <- log2(testData + 1) - - -pseudocount.use <- c(1,1e-99,1/nrow(testData)) -testLogMeans <- sapply(pseudocount.use,function(X) apply(logTestData,MARGIN=1,FUN=mean.logX,pc.out=X)) -logGER <- apply(testLogMeans,2,function(X) X[halfway] - X) - -pdf(file="../Reports/18_F1000Res_scClustViz/Fig1a.pdf",height=6,width=6) -layout(matrix(c(3,1,4,2),2),widths=c(6,1),heights=c(1,6)) -par(mar=c(3,3,.1,.1),mgp=2:0) -plot(x=NULL,y=NULL,xlim=range(log2(GER)[-1]),ylim=c(min(logGER),max(logGER[,-2])), - xlab="Log2(Gene Expression Ratio)",ylab="logGER with pseudocount") -for (i in 1:ncol(logGER)) { - points(log2(GER)[-1],logGER[-1,i],pch=19,col=alpha(brewer.pal(3,"Dark2")[i],.5)) -} -abline(0,1) -legend("topleft",bty="n",pch=19,col=alpha(brewer.pal(3,"Dark2"),.5), - legend=c("1","1e-99","1/#cells"),title="Pseudocount") -par(mar=c(3,.1,.1,1)) -plot(x=c(1,1),y=logGER[1,-2],ylim=c(min(logGER),max(logGER[,-2])), - xaxt="n",yaxt="n",xlab=NA, - pch=19,col=alpha(brewer.pal(3,"Dark2"),.5)[c(1,3)]) -axis(1,1,expression(infinity)) -par(mar=c(.1,3,1,.1)) -plot(x=1,y=logGER[1,2],type="n",xaxt="n",ylab=NA) -par(mar=c(.1,.1,1,1)) -plot(x=1,y=logGER[1,2],xaxt="n",yaxt="n", - pch=19,col=alpha(brewer.pal(3,"Dark2"),.5)[2]) -dev.off() - - -pdf(file="../Reports/18_F1000Res_scClustViz/Fig1b.pdf",height=3,width=6) -layout(rbind(1:2),widths=c(6,1)) -par(mar=c(3,3,1,.1),mgp=2:0) -plot(x=NULL,y=NULL,xlim=c(min(logGER),max(logGER[,-2])),ylim=c(.5,3.5), - yaxt="n",ylab="Pseudocount",xlab="logGER with pseudocount") -axis(2,1:3,c("1","1e-99","1/#cells")) -for (i in 1:ncol(logGER)) { - if (i == 2) { drop2 <- -1 } else { drop2 <- 1:nrow(logGER) } - boxplot(logGER[drop2,i],col=alpha(brewer.pal(3,"Dark2")[i],.5), - add=T,at=i,horizontal=T,xaxt="n") -} -par(mar=c(3,.1,1,1)) -plot(x=logGER[1,2],y=2,ylim=c(.5,3.5),yaxt="n",xlab=NA) -dev.off() diff --git a/ToBeConvertedToPkg/paper/Fig2.R b/ToBeConvertedToPkg/paper/Fig2.R deleted file mode 100644 index 2ca400c..0000000 --- a/ToBeConvertedToPkg/paper/Fig2.R +++ /dev/null @@ -1,194 +0,0 @@ -library(pbapply) -library(RColorBrewer) -#### Data setup from PrepareInputs.R #### -mean.logX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -exponent <- 2 -pseudocount <- 1 - -load("meCortex/e13/e13_Cortical_Only_forViz.RData") -rm(CGS,deTissue,deMarker,deNeighb) - -res <- "res.0.8" -CGS <- list() - -DR <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) sum(Y>0)/length(Y))) -MDTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) -})) -MTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],mean.logX)) -CGS[[res]] <- sapply(levels(cl[,res]),function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - -combos <- combn(levels(cl[,res]),2) -colnames(combos) <- apply(combos,2,function(X) paste(X,collapse="-")) -deM_dDR <- apply(combos,2,function(i) DR[i[1],] - DR[i[2],]) -deM_logGER <- apply(combos,2,function(i) MTC[i[1],] - MTC[i[2],]) - -pVal_unfiltered <- pbsapply(colnames(combos),function(i) - apply(nge,1,function(X) - wilcox.test(X[cl[,res] == combos[1,i]], - X[cl[,res] == combos[2,i]])$p.value),simplify=T) -pVal_unfiltered[is.na(pVal_unfiltered)] <- 1 - - -#### Actual experiment #### -par(mar=c(3,3,3,1),mgp=2:0) - -plot(as.vector(deM_logGER),-log10(as.vector(pVal_unfiltered)),pch=".", - ylab="-log10(p-value)",xlab="Log2(Gene Expression Ratio)") -plot(as.vector(deM_dDR),-log10(as.vector(pVal_unfiltered)),pch=".", - ylab="-log10(p-value)",xlab=expression(Delta~"Detection Rate")) - - -TPR <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & as.vector(pVal_unfiltered) < 0.01) / sum(as.vector(pVal_unfiltered) < 0.01) -} - -FPR <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & !(as.vector(pVal_unfiltered) < 0.01)) / sum(!as.vector(pVal_unfiltered) < 0.01) -} - -Pr <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & as.vector(pVal_unfiltered) < 0.01) / sum(abs(as.vector(method)) > thresh) -} - -dDR <- data.frame(TPR=sapply(seq(0,1,.01),TPR,method=deM_dDR), - FPR=sapply(seq(0,1,.01),FPR,method=deM_dDR), - Pr=sapply(seq(0,1,.01),Pr,method=deM_dDR)) -rownames(dDR) <- seq(0,1,.01) -GER <- data.frame(TPR=sapply(seq(0,10,.05),TPR,method=deM_logGER), - FPR=sapply(seq(0,10,.05),FPR,method=deM_logGER), - Pr=sapply(seq(0,10,.05),Pr,method=deM_logGER)) -rownames(GER) <- seq(0,10,.05) - -bp <- brewer.pal(3,"Dark2") - -plot(TPR~FPR,data=dDR,type="l",col=bp[1],ylab="True Positive Rate",xlab="False Positive Rate",lwd=2) -lines(TPR~FPR,data=GER,type="l",col=bp[2],lwd=2) -legend("top",bty="n",lwd=2,col=bp[1:2],horiz=T,inset=c(0,-.12),xpd=NA, - legend=c(expression(Delta~"Detection Rate"),"Gene Expression Ratio"),title="Threshold type") - -pdf(file="../Reports/18_F1000Res_scClustViz/Fig2a.pdf",width=6,height=6) -par(mar=c(3,3,3,1),mgp=2:0) -plot(Pr~TPR,data=dDR,type="l",col=bp[1],ylab="Precision",xlab="Recall",lwd=2) -lines(Pr~TPR,data=GER,type="l",col=bp[2],lwd=2) -points(Pr~TPR,data=dDR[c("0.1","0.15","0.2"),],pch=19,col=bp[1]) -text(dDR[c("0.1","0.15","0.2"),"TPR"],dDR[c("0.1","0.15","0.2"),"Pr"],c("0.1","0.15","0.2"),col=bp[1],pos=4) -legend("top",bty="n",lwd=2,col=bp[1:2],horiz=T,inset=c(0,-.12),xpd=NA, - legend=c(expression(Delta~"Detection Rate"),"Gene Expression Ratio"),title="Threshold type") -dev.off() - - - - -######### 10X Chromium ######## - -#### Data setup from PrepareInputs.R #### -mean.logX <- function(data,ex=exponent,pc=pseudocount) { log(mean(ex^data - pc) + 1/ncol(nge),base=ex) } -exponent <- 2 -pseudocount <- 1 - -load("liver/HumanLiver_forViz.RData") -rm(CGS,deTissue,deMarker,deNeighb) - -res <- "RG0.res.0.8" -CGS <- list() - -DR <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) sum(Y>0)/length(Y))) -MDTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) -})) -MTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],mean.logX)) -CGS[[res]] <- sapply(levels(cl[,res]),function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - -combos <- combn(levels(cl[,res]),2) -colnames(combos) <- apply(combos,2,function(X) paste(X,collapse="-")) -deM_dDR <- apply(combos,2,function(i) DR[i[1],] - DR[i[2],]) -deM_logGER <- apply(combos,2,function(i) MTC[i[1],] - MTC[i[2],]) - -pVal_unfiltered <- pbsapply(colnames(combos),function(i) - apply(nge,1,function(X) - wilcox.test(X[cl[,res] == combos[1,i]], - X[cl[,res] == combos[2,i]])$p.value),simplify=T) -pVal_unfiltered[is.na(pVal_unfiltered)] <- 1 - - -#### Actual experiment #### -par(mar=c(3,3,3,1),mgp=2:0) - -plot(as.vector(deM_logGER),-log10(as.vector(pVal_unfiltered)),pch=".", - ylab="-log10(p-value)",xlab="Log2(Gene Expression Ratio)") -plot(as.vector(deM_dDR),-log10(as.vector(pVal_unfiltered)),pch=".", - ylab="-log10(p-value)",xlab=expression(Delta~"Detection Rate")) - - -TPR <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & as.vector(pVal_unfiltered) < 0.01) / sum(as.vector(pVal_unfiltered) < 0.01) -} - -FPR <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & !(as.vector(pVal_unfiltered) < 0.01)) / sum(!as.vector(pVal_unfiltered) < 0.01) -} - -Pr <- function(thresh,method) { - sum(abs(as.vector(method)) > thresh & as.vector(pVal_unfiltered) < 0.01) / sum(abs(as.vector(method)) > thresh) -} - -dDR2 <- data.frame(TPR=sapply(seq(0,1,.01),TPR,method=deM_dDR), - FPR=sapply(seq(0,1,.01),FPR,method=deM_dDR), - Pr=sapply(seq(0,1,.01),Pr,method=deM_dDR)) -rownames(dDR2) <- seq(0,1,.01) -GER2 <- data.frame(TPR=sapply(seq(0,10,.05),TPR,method=deM_logGER), - FPR=sapply(seq(0,10,.05),FPR,method=deM_logGER), - Pr=sapply(seq(0,10,.05),Pr,method=deM_logGER)) -rownames(GER2) <- seq(0,10,.05) - -bp <- brewer.pal(3,"Dark2") - -plot(TPR~FPR,data=dDR2,type="l",col=bp[1],ylab="True Positive Rate",xlab="False Positive Rate",lwd=2) -lines(TPR~FPR,data=GER2,type="l",col=bp[2],lwd=2) -legend("top",bty="n",lwd=2,col=bp[1:2],horiz=T,inset=c(0,-.12),xpd=NA, - legend=c(expression(Delta~"Detection Rate"),"Gene Expression Ratio"),title="Threshold type") - -pdf(file="../Reports/18_F1000Res_scClustViz/Fig2b.pdf",width=6,height=6) -par(mar=c(3,3,3,1),mgp=2:0) -plot(Pr~TPR,data=dDR2,type="l",col=bp[1],ylab="Precision",xlab="Recall",lwd=2) -lines(Pr~TPR,data=GER2,type="l",col=bp[2],lwd=2) -points(Pr~TPR,data=dDR2[c("0.1","0.15","0.2"),],pch=19,col=bp[1]) -text(dDR2[c("0.1","0.15","0.2"),"TPR"],dDR2[c("0.1","0.15","0.2"),"Pr"],c("0.1","0.15","0.2"),col=bp[1],pos=4) -legend("top",bty="n",lwd=2,col=bp[1:2],horiz=T,inset=c(0,-.12),xpd=NA, - legend=c(expression(Delta~"Detection Rate"),"Gene Expression Ratio"),title="Threshold type") -dev.off() - - - -######## Combined Figure ######## - -pdf(file="../Reports/18_F1000Res_scClustViz/Fig2.pdf",width=6,height=6) -par(mar=c(3,3,4,1),mgp=2:0) -plot(x=NULL,y=NULL,xlim=0:1,ylim=0:1,ylab="Precision",xlab="Recall") - -lines(Pr~TPR,data=dDR,type="l",col=bp[1],lwd=2,lty=2) -points(Pr~TPR,data=dDR[c("0.1","0.15","0.2"),],pch=19,col=bp[1]) -text(dDR[c("0.1","0.15","0.2"),"TPR"],dDR[c("0.1","0.15","0.2"),"Pr"],c("0.1","0.15","0.2"),col=bp[1],pos=4) -lines(Pr~TPR,data=GER,type="l",col=bp[1],lwd=2,lty=1) -points(Pr~TPR,data=GER[c("0.25","1","1.5"),],pch=19,col=bp[1]) -text(GER[c("0.25","1","1.5"),"TPR"],GER[c("0.25","1","1.5"),"Pr"],c("0.25","1","1.5"),col=bp[1],pos=1) - -lines(Pr~TPR,data=dDR2,type="l",col=bp[2],lwd=2,lty=2) -points(Pr~TPR,data=dDR2[c("0.1","0.15","0.2"),],pch=19,col=bp[2]) -text(dDR2[c("0.1","0.15","0.2"),"TPR"],dDR2[c("0.1","0.15","0.2"),"Pr"],c("0.1","0.15","0.2"),col=bp[2],pos=1) -lines(Pr~TPR,data=GER2,type="l",col=bp[2],lwd=2,lty=1) -points(Pr~TPR,data=GER2[c("0.25","1","1.5"),],pch=19,col=bp[2]) -text(GER2[c("0.25","1","1.5"),"TPR"],GER2[c("0.25","1","1.5"),"Pr"],c("0.25","1","1.5"),col=bp[2],pos=1) - -legend("top",bty="n",ncol=2,inset=c(0,-.16),xpd=NA, - lwd=2,lty=c(1,1,2,2),col=bp[c(1,2,1,2)],title="Threshold type", - legend=c("Gene Expression Ratio (DropSeq)","Gene Expression Ratio (10X)", - expression(Delta~"Detection Rate (DropSeq)"),expression(Delta~"Detection Rate (10X)"))) -dev.off() diff --git a/ToBeConvertedToPkg/pipeline/DropSeqDGEhelper.sh b/ToBeConvertedToPkg/pipeline/DropSeqDGEhelper.sh deleted file mode 100644 index 5645e46..0000000 --- a/ToBeConvertedToPkg/pipeline/DropSeqDGEhelper.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -gunzip -k SORTED_out_gene_exon_tagged_dge.txt.gz - -#rows -awk 'NR > 1 {print $1}' SORTED_out_gene_exon_tagged_dge.txt | awk 'END {print NR}' -awk 'NR > 1 {print $1}' SORTED_out_gene_exon_tagged_dge.txt > SORTED_out_gene_exon_tagged_dge_GENES.txt - -#columns -head -n1 SORTED_out_gene_exon_tagged_dge.txt | cut -f1 --complement | awk '{print NF}' -head -n1 SORTED_out_gene_exon_tagged_dge.txt | cut -f1 --complement > SORTED_out_gene_exon_tagged_dge_BARCODES.txt - -#matrix -awk 'NR > 1 {print $0}' SORTED_out_gene_exon_tagged_dge.txt | cut -f1 --complement | awk 'END {print NR, NF}' -awk 'NR > 1 {print $0}' SORTED_out_gene_exon_tagged_dge.txt | cut -f1 --complement > SORTED_out_gene_exon_tagged_dge_MATRIX.txt - -#cleanup -#rm SORTED_out_gene_exon_tagged_dge.txt - - - - -#### ADD NEW DENEIGHB METHOD TO PIPELINE #### -#### E11.5 and E17.5 swapped to new method #### -#### DropSeq loading code: - -if (!file.exists(paste0(dataPath,"ebRaw.RData"))) { - timePoints <- "e17" - temp_path <- paste0(sys,"Dropbox/GDB/meCortex/SharedCortexData/") - temp_preamble <- "e17/outE175_gene_exon_tagged_dge_" - temp_cells <- scan(paste0(temp_path,temp_preamble,"BARCODES.txt"),character(),sep="\t") - temp_genes <- scan(paste0(temp_path,temp_preamble,"GENES.txt"),character(),sep="\t") - tempData <- Matrix(scan(paste0(temp_path,temp_preamble,"MATRIX.txt"),integer(),sep="\t"), - nrow=length(temp_genes),byrow=T) - colnames(tempData) <- paste(timePoints,temp_cells,sep="_") - rownames(tempData) <- temp_genes - ebRaw <- data_load_processing(tempData) - save(ebRaw,timePoints,file=paste0(dataPath,"ebRaw.RData")) -} else { diff --git a/ToBeConvertedToPkg/pipeline/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz b/ToBeConvertedToPkg/pipeline/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz deleted file mode 100644 index da0cae4..0000000 Binary files a/ToBeConvertedToPkg/pipeline/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust.Rmd b/ToBeConvertedToPkg/pipeline/pipeline_Clust.Rmd deleted file mode 100644 index dfa7f80..0000000 --- a/ToBeConvertedToPkg/pipeline/pipeline_Clust.Rmd +++ /dev/null @@ -1,543 +0,0 @@ ---- -title: "Clust" -output: - md_document: - toc: yes - pdf_document: - toc: yes ---- -# Clustering using *Seurat* - -```{r Setup, echo=T} -### Restart your R session between running pipeline_QCN.Rmd and pipeline_Clust.Rmd, -### unless you've set the maximum number of DLLs to > 100. Seurat and scran cannot -### be loaded at the same time in R with the default DLL limit of 100. - -dataName <- "10Xneurons" -## Name your analysis results -dataPath <- "../scClustViz_files/demo_10Xneurons900/" -## Path to analysis output directory - -library(scales) -library(viridis) -library(RColorBrewer) -library(cluster) -library(pbapply) -library(Seurat) #see http://satijalab.org/seurat/install.html - -rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) -} - -load(paste0(dataPath,"clustInputs.RData")) -``` - -# Clustering by SNN-cliq - -Seurat implements an interpretation of SNN-Cliq (https://doi.org/10.1093/bioinformatics/btv088) for clustering of single-cell expression data. They use PCs to define the distance metric, then embed the cells in a graph where edges between cells (nodes) are weighted based on their similarity (euclidean distance in PCA space). These edge weights are refined based on Jaccard distance (overlap in local neighbourhoods), and then communities ("quasi-cliques") are identified in the graph using a smart local moving algorithm (SLM, http://dx.doi.org/10.1088/1742-5468/2008/10/P10008) to optimize the modularity measure of the defined communities in the graph. - -## PCA and Spectral tSNE - -```{r viz_fx, echo=T} -plotPCloadings <- function( - eb1S,highlight=NULL,components="PCA",nPC=20, - comparison=c("Timepoint","LibrarySize","GeneDetect","mitoPct","CCcyclone","CCseurat") -) { - if (nPC %% 5 != 0) { - warning("nPC argument should be multiple of 5") - } - if (components == "PCA") { - input <- eb1S@dr$pca@cell.embeddings - } else if (components == "ICA") { - input <- eb1S@dr$ica@cell.embeddings - } else { - warning("components argument should be in c(\"PCA\",\"ICA\")") - } - if (comparison == "Timepoint") { - compInput <- eb1S@meta.data$orig.ident - } else if (comparison == "LibrarySize") { - compInput <- log10(eb1S@meta.data$total_counts) - comparison <- expression(log[10]~"Library size") - } else if (comparison == "GeneDetect") { - compInput <- log10(eb1S@meta.data$total_features) - comparison <- expression(log[10]~"Genes Detected") - } else if (comparison == "mitoPct") { - compInput <- eb1S@meta.data$mitoPct - comparison <- "Proportion of mitochondrial transcripts detected" - } else if (comparison == "CCcyclone") { - compInput <- eb1S@meta.data$cycPhase - } else if (comparison == "CCseurat") { - compInput <- eb1S@meta.data$Phase - } else { - warning("comparison argument should be in -c(\"Timepoint\",\"LibrarySize\",\"mitoPct\",\"CCcyclone\",\"CCseurat\")") - } - .parDef <- par(no.readonly=T) - layout(rbind(c(0,rep(nPC+1,5)), - cbind(rep(nPC+3,nPC/5), - matrix(seq(1,nPC),ncol=5,byrow=T)), - c(0,rep(nPC+2,5))), - widths=c(0.2,rep(1,nPC/5)), - heights=c(0.2,rep(1,nPC/5),0.2)) - par(mar=c(1,1,1,1),mgp=c(1,0,0),tck=0.02) - for (i in 1:nPC) { - if (is.factor(compInput)) { - temp_density <- tapply(input[,i],compInput,density) - plot(x=NULL,y=NULL,main=paste0(components," #",i),ylab=NA,xlab=NA, - xlim=range(sapply(temp_density,function(X) range(X$x))), - ylim=range(sapply(temp_density,function(X) range(X$y)))) - rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col="grey70") - for (l in seq_along(temp_density)) { - lines(temp_density[[l]],lwd=3,col=viridis(length(temp_density),.6,1,0)[l]) - } - } else { - plot(x=compInput,y=input[,i],pch=16,col=alpha("black",0.3), - main=paste0(components," #",i," (r=",round(cor(compInput,input[,i]),2),")")) - } - if (i %in% highlight) { box(lwd=2,col="red") } - } - par(mar=c(0,0,0,0)) - if (is.factor(compInput)) { - plot.new() - legend(col=viridis(length(temp_density),1,1,0),lwd=3, - legend=names(temp_density),x="center",horiz=T,bty="n") - plot.new() - mtext("Principal component projection",1,line=-1.5) - plot.new() - mtext("Density",2,line=-1.5) - } else { - plot.new() - plot.new() - mtext(comparison,1,line=-1.5) - plot.new() - mtext("Principal component projection",2,line=-1.5) - } - par(.parDef) -} - -plotTSNEbatch <- function( - eb1S, - colour=c("Timepoint","LibrarySize","GeneDetect","mitoPct","CCcyclone","CCseurat") -) { - if (colour == "Timepoint") { - colourVector <- eb1S@meta.data$orig.ident - colourLength <- length(levels(eb1S@meta.data$orig.ident)) - rnd <- sample(1:nrow(eb1S@dr$tsne@cell.embeddings), - size=nrow(eb1S@dr$tsne@cell.embeddings)) - } else if (colour == "LibrarySize") { - inputVal <- "total_counts" - colourVector <- cut(log10(eb1S@meta.data$total_counts),breaks=100,labels=F) - colourLength <- 100 - colourScale <- "(log scale)" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "GeneDetect") { - inputVal <- "total_features" - colourVector <- cut(log10(eb1S@meta.data$total_features),breaks=100,labels=F) - colourLength <- 100 - colourScale <- "(log scale)" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "mitoPct") { - inputVal <- "mitoPct" - colourVector <- cut(eb1S@meta.data$mitoPct,breaks=100,labels=F) - colourLength <- 100 - colourScale <- "" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "CCcyclone") { - colourVector <- eb1S@meta.data$cycPhase - colourLength <- length(levels(eb1S@meta.data$cycPhase)) - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "CCseurat") { - colourVector <- eb1S@meta.data$Phase - colourLength <- length(levels(eb1S@meta.data$Phase)) - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else { - warning("colour argument should be in -c(\"Timepoint\",\"LibrarySize\",\"mitoPct\",\"CCcyclone\",\"CCseurat\")") - } - plot(eb1S@dr$tsne@cell.embeddings[rnd,],pch=21,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(colourLength,.7,d=-1)[colourVector[rnd]], - bg=viridis(colourLength,.3,d=-1)[colourVector[rnd]]) - if (is.factor(colourVector)) { - title(main=colour,adj=0.01) - if (length(levels(colourVector)) <= 3) { - legend("topright",xpd=NA,inset=c(0,-.08),bty="n",horiz=T, - legend=levels(colourVector),pch=16, - col=viridis(colourLength,.7,d=-1)[seq_along(levels(colourVector))]) - } else { - legend("topright",xpd=NA,inset=c(0,-.08),bty="n", - ncol=ceiling(length(levels(colourVector))/2), - legend=levels(colourVector),pch=16, - col=viridis(colourLength,.7,d=-1)[seq_along(levels(colourVector))]) - } - } else { - title(main=colour,adj=0.01) - segments(x0=seq(quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.5), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.9),length.out=1000), - y0=max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.1, - y1=max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.16, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.5), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.7), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.9)), - y=rep(max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.13,3), - labels=c(round(min(eb1S@meta.data[,inputVal]),2), - colourScale, - round(max(eb1S@meta.data[,inputVal]),2)), - pos=2:4,xpd=NA) - } -} -``` - - -```{r PCA, echo=T} -if (!file.exists(paste0(dataPath,"postPCA.RData"))) { - eb1S <- CreateSeuratObject(ebNorm,dataName,meta.data=as.data.frame(pDat), - min.cells=0,min.genes=0,save.raw=F) - data("cc.genes") - eb1S <- CellCycleScoring(eb1S,g2m.genes=cc.genes$g2m.genes,s.genes=cc.genes$s.genes) - ## If using mouse data, this will give a warning before doing case-insentive matching. - ## This is because the cell cycle gene names used are from human. - eb1S@meta.data$Phase <- factor(eb1S@meta.data$Phase,levels=c("G1","S","G2M")) - if (exists("hvg")) { - eb1S@var.genes <- rownames(hvg) - } else { - eb1S@var.genes <- rownames(ebNorm) - } - eb1S <- ScaleData(eb1S,display.progress=F,check.for.norm=F) - eb1S <- RunPCA(eb1S,pcs.compute=40,pc.genes=eb1S@var.genes,do.print=F) - save(eb1S,file=paste0(dataPath,"postPCA.RData")) -} else { - load(paste0(dataPath,"postPCA.RData")) - print(paste("Data loaded from",paste0(dataPath,"postPCA.RData"))) -} -``` - -```{r cleanup5, echo=T} -rm(list=ls()[!ls() %in% c("eb1S","timePoints","dataPath", - "plotPCloadings","plotTSNEbatch","rainbow2")]) -gc() -``` - -```{r plotPCs, echo=T,fig.height=6.3,fig.width=8.4,fig.show="hold"} -plotPCloadings(eb1S,comparison="LibrarySize",nPC=20) -plotPCloadings(eb1S,comparison="GeneDetect",nPC=20) -plotPCloadings(eb1S,comparison="mitoPct",nPC=20) -plotPCloadings(eb1S,comparison="CCcyclone",nPC=20) -plotPCloadings(eb1S,comparison="CCseurat",nPC=20) -``` - -It is important to assess the impact of technical factors on downstream clustering. Here we determine whether there are tecnical effects represented in the principal component projections, and the subsequent spectral tSNE projection. - -Technical confounders assessed: -- Library size -- Gene detection rate (strongly correlated with library size) -- Mitochondrial transcript detection proportion -- Cell cycle stage as predicted by *cyclone* -- Cell cycle stage as predicted by *Seurat* - -As you can see, there is a weak correlation between the first PC and mitochondrial transcript proportion, suggesting that cell damage may effect clustering results. You could re-run the PCA after using the `vars.to.regress` argument in the *Seurat* `ScaleData` command. Since PCA results are saved to disk in this pipeline, when re-running make sure to either rename or delete the original `postPCA.RData` file in your data path. - -```{r PCA_select, echo=T,fig.height=4.2,fig.width=4.2,fig.show="hold"} -### Selecting number of PCs to use. Run this code block once, then set maxPCt. -maxPCt <- 10 -## Number of PCs to use for downstream analysis, chosen from elbow of scree plot. -PCuse <- seq(1,maxPCt) - - -par(mar=c(3,3,1,1),mgp=2:0) -plot(seq_along(eb1S@dr$pca@sdev),eb1S@dr$pca@sdev, - xlab="Principal Component",ylab="Standard Deviation of PC") -Hmisc::minor.tick(nx=5,ny=1) - -points(maxPCt,eb1S@dr$pca@sdev[maxPCt],type="h",col="darkred") -arrows(x0=0.5,y0=eb1S@dr$pca@sdev[maxPCt]-.2, - x1=maxPCt-.1,y1=eb1S@dr$pca@sdev[maxPCt]-.2, - col="darkred",code=3,length=.1) -text(x=maxPCt/2,y=eb1S@dr$pca@sdev[maxPCt]-.2,pos=1, - labels="PCs used",col="darkred") -``` - -```{r tSNE, echo=T,fig.height=6.3,fig.width=6.3,fig.show="hold"} -eb1S <- RunTSNE(eb1S,dims.use=PCuse,perplexity=30) -## Default perplexity is 30, but should be set lower with small numbers of cells. -par(mar=c(3,3,2,1),mgp=2:0) -plotTSNEbatch(eb1S,colour="LibrarySize") -plotTSNEbatch(eb1S,colour="GeneDetect") -plotTSNEbatch(eb1S,colour="mitoPct") -plotTSNEbatch(eb1S,colour="CCcyclone") -plotTSNEbatch(eb1S,colour="CCseurat") -``` - - -```{r ClusterAndDE, echo=T,fig.height=5.6,fig.width=8.4} -if (!file.exists(paste0(dataPath,"eb1S.RData"))) { - ######## User-defined variables ######## - exponent <- 2 - ## ^ log base of your normalized input data. - ## Seurat defaults to natural log (set this to exp(1)), - ## other methods are generally log2 (set this to 2). - pseudocount <- 1 - ## ^ pseudocount added to all log-normalized values in your input data. - ## Most methods use a pseudocount of 1 to eliminate log(0) errors. - - #threshType <- "logGER" # use an expression ratio-based threshold for filtering genes prior to DE testing - threshType <- "dDR" # use a difference in detection rate threshold for filtering - ## Filtering genes for use in differential expression testing can be done multiple ways. - ## We use an expression ratio filter for comparing each cluster to the rest of the tissue as a whole, - ## but find that difference in detection rates works better when comparing clusters to each other. - ## You can set threshType to "logGER" to use fold-change for all gene filtering if you'd prefer. - - logGERthresh <- 1 # magnitude of mean log-expression fold change between clusters to use as filter. - dDRthresh <- 0.15 # magnitude of detection rate difference between clusters to use as filter. - WRSTalpha <- 0.01 # significance level for DE testing using Wilcoxon rank sum test - - ######## Functions ######## - mean.logX <- function(data,ex=exponent,pc=pseudocount) { - log(mean(ex^data - pc) + 1/ncol(eb1S@data),base=ex) - } - ## ^ Adding a pseudocount of 1 to the logMean prior to logFC calculations skews the - ## result quite dramatically, so instead we add a small pseudocount to avoid +/- inf - ## results when means are zero, without the same skewing. Adding a very small number - ## means that means of zero get set to a large negative log-mean, when it might be - ## more appropriate to have those values fall closer to the smallest non-zero log-mean. - ## By using a pseudocount of 1 / number of samples, we ensure that log(zero) is smaller - ## than any non-zero log-mean, while still being in the same ballpark. - - ######## Iteratively cluster and build DE sets ######## - CGS <- deTissue <- deVS <- deMarker <- deDist <- deNeighb <- list() - resVal <- 0; minNeighbDE <- 100 - while (minNeighbDE > 0) { - if (minNeighbDE <= 30) { - resVal <- resVal + 0.1 - } else { - resVal <- resVal + 0.2 - } - print("") - print("") - print(paste0("~~~~~~~~~~~~ Clustering at res.",resVal," ~~~~~~~~~~~~")) - if (!any(grepl("^res",colnames(eb1S@meta.data)))) { - eb1S <- FindClusters(eb1S,reduction.type="pca",dims.use=PCuse,k.param=30, - print.output=F,resolution=resVal, - algorithm=3,n.start=100,n.iter=100,save.SNN=T) - ## 30 is the default K, but as with tSNE perplexity, a smaller value might be - ## more appropriate with small cell numbers. - print(paste(length(levels(eb1S@ident)),"clusters identified")) - } else { - eb1S <- FindClusters(eb1S,print.output=F,resolution=resVal, - algorithm=3,n.start=100,n.iter=100,reuse.SNN=T) - print(paste(length(levels(eb1S@ident)),"clusters identified")) - if (all(eb1S@meta.data[,ncol(eb1S@meta.data)-1] == - eb1S@meta.data[,ncol(eb1S@meta.data)])) { - eb1S@meta.data <- eb1S@meta.data[,-ncol(eb1S@meta.data)] - next - } - } - res <- colnames(eb1S@meta.data)[length(colnames(eb1S@meta.data))] - - #### Precalculate stats for viz tool #### - print("") - print("") - print(paste("Calculating cluster gene summary statistics for",res)) - print("-- Gene detection rate per cluster --") - DR <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) sum(Y>0)/length(Y))) - print("-- Mean detected gene expression per cluster --") - MDTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - print("-- Mean gene expression per cluster --") - MTC <- pbapply(nge,1,function(X) tapply(X,cl[,res],mean.logX)) - CGS[[res]] <- sapply(levels(cl[,res]),function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - - #### deTissue - DE per cluster vs all other data #### - print("") - print(paste("Calculating DE vs tissue for",res,"with",length(levels(cl[,res])),"clusters")) - print("-- logGER calculations --") - deT_logGER <- pbsapply(levels(cl[,res]),function(i) - MTC[i,] - apply(nge[,cl[,res] != i],1,mean.logX)) - deT_genesUsed <- apply(deT_logGER,2,function(X) which(X > logGERthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logGERthresh should be set to less than ", - min(apply(deT_logGER,2,function(X) max(abs(X)))), - ", the largest magnitude logGER between cluster ", - names(which.min(apply(deT_logGER,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - print("-- Wilcoxon rank sum calculations --") - deT_pVal <- pbsapply(levels(cl[,res]),function(i) - apply(nge[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[cl[,res] == i],X[cl[,res] != i])$p.value),simplify=F) - deTissue[[res]] <- sapply(levels(cl[,res]),function(i) - data.frame(logGER=deT_logGER[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,deTissue[[res]])$pVal,"fdr"), - rep(names(sapply(deTissue[[res]],nrow)),sapply(deTissue[[res]],nrow)),c) - for (i in names(deTissue[[res]])) { - deTissue[[res]][[i]] <- deTissue[[res]][[i]][tempQval[[i]] <= WRSTalpha,] - deTissue[[res]][[i]]$qVal <- tempQval[[i]][tempQval[[i]] <= WRSTalpha] - } - - #### deMarker - DE per cluster vs each other cluster #### - combos <- combn(levels(cl[,res]),2) - colnames(combos) <- apply(combos,2,function(X) paste(X,collapse="-")) - print("") - print(paste("Calculating marker DE for",res,"with",ncol(combos),"combinations of clusters")) - deM_dDR <- apply(combos,2,function(i) DR[i[1],] - DR[i[2],]) - deM_logGER <- apply(combos,2,function(i) MTC[i[1],] - MTC[i[2],]) - deM_genesUsed <- switch(threshType, - dDR=apply(deM_dDR,2,function(X) which(abs(X) > dDRthresh)), - logGER=apply(deM_logGER,2,function(X) which(abs(X) > logGERthresh))) - if (any(sapply(deM_genesUsed,length) < 1)) { - stop("Gene filtering threshold is set too high.") - } - deM_pVal <- pbsapply(colnames(combos),function(i) - apply(nge[deM_genesUsed[[i]],],1,function(X) - wilcox.test(X[cl[,res] == combos[1,i]], - X[cl[,res] == combos[2,i]])$p.value),simplify=F) - temp_deVS <- sapply(colnames(combos),function(i) - data.frame(dDR=deM_dDR[deM_genesUsed[[i]],i],logGER=deM_logGER[deM_genesUsed[[i]],i], - pVal=deM_pVal[[i]])[order(deM_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,temp_deVS)$pVal,"fdr"), - rep(names(sapply(temp_deVS,nrow)),sapply(temp_deVS,nrow)),c) - for (i in names(temp_deVS)) { temp_deVS[[i]]$qVal <- tempQval[[i]] } - - deVS[[res]] <- sapply(levels(cl[,res]),function(i) { - combos <- strsplit(names(temp_deVS),"-") - temp <- list() - for (X in seq_along(combos)) { - if (! i %in% combos[[X]]) { - next - } else if (which(combos[[X]] == i) == 1) { - temp[[combos[[X]][2]]] <- temp_deVS[[X]][temp_deVS[[X]][,threshType] > 0 & - temp_deVS[[X]]$qVal <= WRSTalpha,] - } else if (which(combos[[X]] == i) == 2) { - temp[[combos[[X]][1]]] <- temp_deVS[[X]][temp_deVS[[X]][,threshType] < 0 & - temp_deVS[[X]]$qVal <= WRSTalpha,] - temp[[combos[[X]][1]]]$dDR <- temp[[combos[[X]][1]]]$dDR * -1 - temp[[combos[[X]][1]]]$logGER <- temp[[combos[[X]][1]]]$logGER * -1 - } - } - return(temp) - },simplify=F) - - deMarker[[res]] <- sapply(deVS[[res]],function(X) { - markerGenes <- Reduce(intersect,lapply(X,rownames)) - temp <- sapply(X,function(Y) Y[markerGenes,c("dDR","logGER","qVal")],simplify=F) - names(temp) <- paste("vs",names(temp),sep=".") - return(do.call(cbind,temp)) - },simplify=F) - - ### deNeighb - DE between nearest neighbouring clusters #### - deDist[[res]] <- sapply(names(deVS[[res]]),function(X) sapply(names(deVS[[res]]),function(Y) - if (X == Y) { return(NA) } else { min(nrow(deVS[[res]][[X]][[Y]]),nrow(deVS[[res]][[Y]][[X]])) })) - nb <- colnames(deDist[[res]])[apply(deDist[[res]],1,which.min)] - names(nb) <- colnames(deDist[[res]]) - ## Nearest neighbour determined by number of DE genes between clusters. - - deNeighb[[res]] <- mapply(function(NB,VS) VS[[NB]][,c("dDR","logGER","qVal")], - NB=nb,VS=deVS[[res]],SIMPLIFY=F) - for (i in names(deNeighb[[res]])) { - colnames(deNeighb[[res]][[i]]) <- paste("vs",nb[i],colnames(deNeighb[[res]][[i]]),sep=".") - } - - minNeighbDE <- min(sapply(deNeighb[[res]],nrow)) - } - save(eb1S,CGS,deTissue,deMarker,deDist,deNeighb,deVS,deNeighb,file=paste0(dataPath,"eb1S.RData")) -} else { - #### Static visualization of clustering results #### - ## This is done seperately from the processing, because occassionally RStudio crashes - ## while making figures, and having that interrupt the slow clustering loop sucks. - load(paste0(dataPath,"eb1S.RData")) - print(paste("Data loaded from",paste0(dataPath,"eb1S.RData"))) - - - temp_max <- max(unlist(sapply(deNeighb,function(X) sapply(X,nrow)))) - for (res in grep("^res",colnames(eb1S@meta.data),value=T)) { - temp_cl <- as.factor(eb1S@meta.data[,res]) - if (length(levels(temp_cl)) <= 8) { - clustCols <- brewer.pal(length(levels(temp_cl)),"Dark2")[1:length(levels(temp_cl))] - } else { - clustCols <- rainbow2(length(levels(temp_cl))) - } - - layout(matrix(c(1,4,2,3),2),heights=c(3,1)) - par(mar=c(4,1,3,3),mgp=2:0) - tempSil <- silhouette(as.integer(temp_cl), - dist(eb1S@dr$pca@cell.embeddings[,seq(1,maxPCt)],method="euclidean")) - plot(tempSil,main=res,col=clustCols,border=NA) - - par(mar=c(3,3,1,1),mgp=2:0) - plot(eb1S@dr$tsne@cell.embeddings,pch=21,xlab="tSNE_1",ylab="tSNE_2", - col=alpha(clustCols[temp_cl],.7), - bg=alpha(clustCols[temp_cl],.3)) - text(apply(eb1S@dr$tsne@cell.embeddings,2,function(X) tapply(X,temp_cl,mean)), - labels=levels(temp_cl),font=2) - - par(mar=c(3.5,0.2,1,1)) - plot(x=NA,y=NA,xlim=c(30,temp_max+20),ylim=c(0.5,2.5), - xaxs="i",yaxt="n",frame.plot=F,ylab="",xlab="",lab=c(10,5,7)) - boxplot(cbind(sapply(deMarker[[res]],nrow), - sapply(deNeighb[[res]],nrow)), - add=T,horizontal=T,frame.plot=F,yaxt="n",xaxt="n") - abline(v=30,lty=3) - - par(mar=c(3.5,6,1,0.2)) - plot(x=NA,y=NA,xlim=c(-1,30),ylim=c(0.5,2.5), - xaxs="i",yaxt="n",frame.plot=F,ylab="",xlab="") - boxplot(cbind(sapply(deMarker[[res]],nrow), - sapply(deNeighb[[res]],nrow)), - add=T,horizontal=T,frame.plot=F,yaxt="n",xaxt="n") - abline(v=30,lty=3) - par(las=1,mgp=c(0,0,0)) - axis(2,at=1:2,labels=c("vs all clusters","vs neighbour"),lty=0) - mtext("Number of differentially expressed genes per cluster", - side=1,line=2.5,at=30,xpd=NA) - } -} -``` - - -```{r SendTo_scClustViz, echo=T} -if (!file.exists(paste0(dataPath,eb1S@project.name,"_forViz.RData"))) { - #### Convert to scClustViz inputs #### - nge <- eb1S@data - ## ^ normalized gene expression matrix (matrix: genes x cells) - md <- eb1S@meta.data[,!grepl("^res",colnames(eb1S@meta.data))] - ## ^ metadata for cells (dataframe of cells) - if (is.data.frame(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))])) { - cl <- data.frame(lapply(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))], - as.factor)) - } else { - cl <- data.frame(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))]) - colnames(cl) <- grep("^res",colnames(eb1S@meta.data),value=T) - } - rownames(cl) <- rownames(md) - ## ^ cluster assignments per clustering resolution - ## (dataframe: cells x cluster labels as factors) - dr_clust <- eb1S@dr$pca@cell.embeddings[,eb1S@calc.params$RunTSNE$dims.use] - ## ^ cell embeddings in low-dimensional space used for clustering distances - ## (matrix: cells x dimensions) - ## Only including those dimensions used in downstream analysis - ## (ie. those passed to RunTSNE and FindClusters) - ## if that information is present (in calc.params). - ## Else, using all lower dimensions available. - dr_viz <- eb1S@dr$tsne@cell.embeddings - ## ^ cell embeddings in 2D space for visualization - ## (usually tSNE) (matrix: cells x coordinates) - - #### Save outputs for visualization #### - save(nge,md,cl,dr_clust,dr_viz, - CGS,deTissue,deMarker,deDist,deNeighb, - file=paste0(dataPath,eb1S@project.name,"_forViz.RData")) - ## ^ Saved objects for use in visualization script (RunVizScript.R). - - save(deVS,file=paste0(dataPath,eb1S@project.name,"_deVS.RData")) - ## ^ All pairwise DE test results between clusters. -} else { - print(paste("Did not overwrite",paste0(dataPath,"_forViz.RData"))) -} -``` diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust.md b/ToBeConvertedToPkg/pipeline/pipeline_Clust.md deleted file mode 100644 index 70ae5ee..0000000 --- a/ToBeConvertedToPkg/pipeline/pipeline_Clust.md +++ /dev/null @@ -1,579 +0,0 @@ -### [Home](/scClustViz) -- [Clustering using *Seurat*](#clustering-using-seurat) -- [Clustering by SNN-cliq](#clustering-by-snn-cliq) - - [PCA and Spectral tSNE](#pca-and-spectral-tsne) - -Clustering using *Seurat* -========================= - - ### Restart your R session between running pipeline_QCN.Rmd and pipeline_Clust.Rmd, - ### unless you've set the maximum number of DLLs to > 100. Seurat and scran cannot - ### be loaded at the same time in R with the default DLL limit of 100. - - dataName <- "10Xneurons" - ## Name your analysis results - dataPath <- "../scClustViz_files/demo_10Xneurons900/" - ## Path to analysis output directory - - library(scales) - library(viridis) - - ## Loading required package: viridisLite - - ## - ## Attaching package: 'viridis' - - ## The following object is masked from 'package:scales': - ## - ## viridis_pal - - library(RColorBrewer) - library(cluster) - library(pbapply) - library(Seurat) #see http://satijalab.org/seurat/install.html - - ## Loading required package: ggplot2 - - ## Loading required package: cowplot - - ## - ## Attaching package: 'cowplot' - - ## The following object is masked from 'package:ggplot2': - ## - ## ggsave - - ## Loading required package: Matrix - - rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) - } - - load(paste0(dataPath,"clustInputs.RData")) - -Clustering by SNN-cliq -====================== - -Seurat implements an interpretation of SNN-Cliq -() for clustering of -single-cell expression data. They use PCs to define the distance metric, -then embed the cells in a graph where edges between cells (nodes) are -weighted based on their similarity (euclidean distance in PCA space). -These edge weights are refined based on Jaccard distance (overlap in -local neighbourhoods), and then communities ("quasi-cliques") are -identified in the graph using a smart local moving algorithm (SLM, -) to optimize the -modularity measure of the defined communities in the graph. - -PCA and Spectral tSNE ---------------------- - - plotPCloadings <- function( - eb1S,highlight=NULL,components="PCA",nPC=20, - comparison=c("Timepoint","LibrarySize","GeneDetect","mitoPct","CCcyclone","CCseurat") - ) { - if (nPC %% 5 != 0) { - warning("nPC argument should be multiple of 5") - } - if (components == "PCA") { - input <- eb1S@dr$pca@cell.embeddings - } else if (components == "ICA") { - input <- eb1S@dr$ica@cell.embeddings - } else { - warning("components argument should be in c(\"PCA\",\"ICA\")") - } - if (comparison == "Timepoint") { - compInput <- eb1S@meta.data$orig.ident - } else if (comparison == "LibrarySize") { - compInput <- log10(eb1S@meta.data$total_counts) - comparison <- expression(log[10]~"Library size") - } else if (comparison == "GeneDetect") { - compInput <- log10(eb1S@meta.data$total_features) - comparison <- expression(log[10]~"Genes Detected") - } else if (comparison == "mitoPct") { - compInput <- eb1S@meta.data$mitoPct - comparison <- "Proportion of mitochondrial transcripts detected" - } else if (comparison == "CCcyclone") { - compInput <- eb1S@meta.data$cycPhase - } else if (comparison == "CCseurat") { - compInput <- eb1S@meta.data$Phase - } else { - warning("comparison argument should be in - c(\"Timepoint\",\"LibrarySize\",\"mitoPct\",\"CCcyclone\",\"CCseurat\")") - } - .parDef <- par(no.readonly=T) - layout(rbind(c(0,rep(nPC+1,5)), - cbind(rep(nPC+3,nPC/5), - matrix(seq(1,nPC),ncol=5,byrow=T)), - c(0,rep(nPC+2,5))), - widths=c(0.2,rep(1,nPC/5)), - heights=c(0.2,rep(1,nPC/5),0.2)) - par(mar=c(1,1,1,1),mgp=c(1,0,0),tck=0.02) - for (i in 1:nPC) { - if (is.factor(compInput)) { - temp_density <- tapply(input[,i],compInput,density) - plot(x=NULL,y=NULL,main=paste0(components," #",i),ylab=NA,xlab=NA, - xlim=range(sapply(temp_density,function(X) range(X$x))), - ylim=range(sapply(temp_density,function(X) range(X$y)))) - rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col="grey70") - for (l in seq_along(temp_density)) { - lines(temp_density[[l]],lwd=3,col=viridis(length(temp_density),.6,1,0)[l]) - } - } else { - plot(x=compInput,y=input[,i],pch=16,col=alpha("black",0.3), - main=paste0(components," #",i," (r=",round(cor(compInput,input[,i]),2),")")) - } - if (i %in% highlight) { box(lwd=2,col="red") } - } - par(mar=c(0,0,0,0)) - if (is.factor(compInput)) { - plot.new() - legend(col=viridis(length(temp_density),1,1,0),lwd=3, - legend=names(temp_density),x="center",horiz=T,bty="n") - plot.new() - mtext("Principal component projection",1,line=-1.5) - plot.new() - mtext("Density",2,line=-1.5) - } else { - plot.new() - plot.new() - mtext(comparison,1,line=-1.5) - plot.new() - mtext("Principal component projection",2,line=-1.5) - } - par(.parDef) - } - - plotTSNEbatch <- function( - eb1S, - colour=c("Timepoint","LibrarySize","GeneDetect","mitoPct","CCcyclone","CCseurat") - ) { - if (colour == "Timepoint") { - colourVector <- eb1S@meta.data$orig.ident - colourLength <- length(levels(eb1S@meta.data$orig.ident)) - rnd <- sample(1:nrow(eb1S@dr$tsne@cell.embeddings), - size=nrow(eb1S@dr$tsne@cell.embeddings)) - } else if (colour == "LibrarySize") { - inputVal <- "total_counts" - colourVector <- cut(log10(eb1S@meta.data$total_counts),breaks=100,labels=F) - colourLength <- 100 - colourScale <- "(log scale)" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "GeneDetect") { - inputVal <- "total_features" - colourVector <- cut(log10(eb1S@meta.data$total_features),breaks=100,labels=F) - colourLength <- 100 - colourScale <- "(log scale)" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "mitoPct") { - inputVal <- "mitoPct" - colourVector <- cut(eb1S@meta.data$mitoPct,breaks=100,labels=F) - colourLength <- 100 - colourScale <- "" - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "CCcyclone") { - colourVector <- eb1S@meta.data$cycPhase - colourLength <- length(levels(eb1S@meta.data$cycPhase)) - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else if (colour == "CCseurat") { - colourVector <- eb1S@meta.data$Phase - colourLength <- length(levels(eb1S@meta.data$Phase)) - rnd <- 1:nrow(eb1S@dr$tsne@cell.embeddings) - } else { - warning("colour argument should be in - c(\"Timepoint\",\"LibrarySize\",\"mitoPct\",\"CCcyclone\",\"CCseurat\")") - } - plot(eb1S@dr$tsne@cell.embeddings[rnd,],pch=21,xlab="tSNE_1",ylab="tSNE_2", - col=viridis(colourLength,.7,d=-1)[colourVector[rnd]], - bg=viridis(colourLength,.3,d=-1)[colourVector[rnd]]) - if (is.factor(colourVector)) { - title(main=colour,adj=0.01) - if (length(levels(colourVector)) <= 3) { - legend("topright",xpd=NA,inset=c(0,-.08),bty="n",horiz=T, - legend=levels(colourVector),pch=16, - col=viridis(colourLength,.7,d=-1)[seq_along(levels(colourVector))]) - } else { - legend("topright",xpd=NA,inset=c(0,-.08),bty="n", - ncol=ceiling(length(levels(colourVector))/2), - legend=levels(colourVector),pch=16, - col=viridis(colourLength,.7,d=-1)[seq_along(levels(colourVector))]) - } - } else { - title(main=colour,adj=0.01) - segments(x0=seq(quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.5), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.9),length.out=1000), - y0=max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.1, - y1=max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.16, - col=viridis(1000,d=-1),xpd=NA) - text(x=c(quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.5), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.7), - quantile(range(eb1S@dr$tsne@cell.embeddings[,1]),.9)), - y=rep(max(eb1S@dr$tsne@cell.embeddings[,2]) * 1.13,3), - labels=c(round(min(eb1S@meta.data[,inputVal]),2), - colourScale, - round(max(eb1S@meta.data[,inputVal]),2)), - pos=2:4,xpd=NA) - } - } - - if (!file.exists(paste0(dataPath,"postPCA.RData"))) { - eb1S <- CreateSeuratObject(ebNorm,dataName,meta.data=as.data.frame(pDat), - min.cells=0,min.genes=0,save.raw=F) - data("cc.genes") - eb1S <- CellCycleScoring(eb1S,g2m.genes=cc.genes$g2m.genes,s.genes=cc.genes$s.genes) - ## If using mouse data, this will give a warning before doing case-insentive matching. - ## This is because the cell cycle gene names used are from human. - eb1S@meta.data$Phase <- factor(eb1S@meta.data$Phase,levels=c("G1","S","G2M")) - if (exists("hvg")) { - eb1S@var.genes <- rownames(hvg) - } else { - eb1S@var.genes <- rownames(ebNorm) - } - eb1S <- ScaleData(eb1S,display.progress=F,check.for.norm=F) - eb1S <- RunPCA(eb1S,pcs.compute=40,pc.genes=eb1S@var.genes,do.print=F) - save(eb1S,file=paste0(dataPath,"postPCA.RData")) - } else { - load(paste0(dataPath,"postPCA.RData")) - print(paste("Data loaded from",paste0(dataPath,"postPCA.RData"))) - } - - ## [1] "Data loaded from ../scClustViz_files/demo_10Xneurons900/postPCA.RData" - - rm(list=ls()[!ls() %in% c("eb1S","timePoints","dataPath", - "plotPCloadings","plotTSNEbatch","rainbow2")]) - gc() - - ## used (Mb) gc trigger (Mb) max used (Mb) - ## Ncells 3033107 162 4716652 251.9 4716652 251.9 - ## Vcells 25811305 197 45641246 348.3 30107849 229.8 - - plotPCloadings(eb1S,comparison="LibrarySize",nPC=20) - plotPCloadings(eb1S,comparison="GeneDetect",nPC=20) - plotPCloadings(eb1S,comparison="mitoPct",nPC=20) - plotPCloadings(eb1S,comparison="CCcyclone",nPC=20) - plotPCloadings(eb1S,comparison="CCseurat",nPC=20) - -![](pipeline_Clust_files/figure-markdown_strict/plotPCs-1.png)![](pipeline_Clust_files/figure-markdown_strict/plotPCs-2.png)![](pipeline_Clust_files/figure-markdown_strict/plotPCs-3.png)![](pipeline_Clust_files/figure-markdown_strict/plotPCs-4.png)![](pipeline_Clust_files/figure-markdown_strict/plotPCs-5.png) - -It is important to assess the impact of technical factors on downstream -clustering. Here we determine whether there are tecnical effects -represented in the principal component projections, and the subsequent -spectral tSNE projection. - -Technical confounders assessed: -- Library size -- Gene detection rate (strongly correlated with library size) -- Mitochondrial transcript detection proportion -- Cell cycle stage as predicted by *cyclone* - Cell cycle stage as -predicted by *Seurat* - -As you can see, there is a weak correlation between the first PC and -mitochondrial transcript proportion, suggesting that cell damage may -effect clustering results. You could re-run the PCA after using the -`vars.to.regress` argument in the *Seurat* `ScaleData` command. Since -PCA results are saved to disk in this pipeline, when re-running make -sure to either rename or delete the original `postPCA.RData` file in -your data path. - - ### Selecting number of PCs to use. Run this code block once, then set maxPCt. - maxPCt <- 10 - ## Number of PCs to use for downstream analysis, chosen from elbow of scree plot. - PCuse <- seq(1,maxPCt) - - - par(mar=c(3,3,1,1),mgp=2:0) - plot(seq_along(eb1S@dr$pca@sdev),eb1S@dr$pca@sdev, - xlab="Principal Component",ylab="Standard Deviation of PC") - Hmisc::minor.tick(nx=5,ny=1) - - points(maxPCt,eb1S@dr$pca@sdev[maxPCt],type="h",col="darkred") - arrows(x0=0.5,y0=eb1S@dr$pca@sdev[maxPCt]-.2, - x1=maxPCt-.1,y1=eb1S@dr$pca@sdev[maxPCt]-.2, - col="darkred",code=3,length=.1) - text(x=maxPCt/2,y=eb1S@dr$pca@sdev[maxPCt]-.2,pos=1, - labels="PCs used",col="darkred") - -![](pipeline_Clust_files/figure-markdown_strict/PCA_select-1.png) - - eb1S <- RunTSNE(eb1S,dims.use=PCuse,perplexity=30) - ## Default perplexity is 30, but should be set lower with small numbers of cells. - par(mar=c(3,3,2,1),mgp=2:0) - plotTSNEbatch(eb1S,colour="LibrarySize") - plotTSNEbatch(eb1S,colour="GeneDetect") - plotTSNEbatch(eb1S,colour="mitoPct") - plotTSNEbatch(eb1S,colour="CCcyclone") - plotTSNEbatch(eb1S,colour="CCseurat") - -![](pipeline_Clust_files/figure-markdown_strict/tSNE-1.png)![](pipeline_Clust_files/figure-markdown_strict/tSNE-2.png)![](pipeline_Clust_files/figure-markdown_strict/tSNE-3.png)![](pipeline_Clust_files/figure-markdown_strict/tSNE-4.png)![](pipeline_Clust_files/figure-markdown_strict/tSNE-5.png) - - if (!file.exists(paste0(dataPath,"eb1S.RData"))) { - ######## User-defined variables ######## - exponent <- 2 - ## Log base of your normalized input data. - ## Seurat defaults to natural log (set this to exp(1)), - ## other methods are generally log2 (set this to 2). - pseudocount <- 1 - ## Pseudocount added to all log-normalized values in your input data. - ## Most methods use a pseudocount of 1 to eliminate log(0) errors. - logFCthresh <- 1 - ## Magnitude of mean log-expression fold change to use as a minimum threshold - ## for DE testing - WRSTalpha <- 0.01 - ## significance level for DE testing using Wilcoxon rank sum test - - ######## Functions ######## - mean.logX <- function(data,ex=exponent,pc=pseudocount) { - log(mean(ex^data - pc) + 1/ncol(eb1S@data),base=ex) - } - ## ^ Adding a pseudocount of 1 to the logMean prior to logFC calculations skews the - ## result quite dramatically, so instead we add a small pseudocount to avoid +/- inf - ## results when means are zero, without the same skewing. Adding a very small number - ## means that means of zero get set to a large negative log-mean, when it might be - ## more appropriate to have those values fall closer to the smallest non-zero log-mean. - ## By using a pseudocount of 1 / number of samples, we ensure that log(zero) is smaller - ## than any non-zero log-mean, while still being in the same ballpark. - - ######## Iteratively cluster and build DE sets ######## - CGS <- deTissue <- deVS <- deMarker <- deNeighb <- list() - resVal <- 0; minNeighbDE <- 100 - while (minNeighbDE > 0) { - if (minNeighbDE <= 30) { - resVal <- resVal + 0.1 - } else { - resVal <- resVal + 0.2 - } - print("") - print("") - print(paste0("~~~~~~~~~~~~ Clustering at res.",resVal," ~~~~~~~~~~~~")) - if (!any(grepl("^res",colnames(eb1S@meta.data)))) { - eb1S <- FindClusters(eb1S,reduction.type="pca",dims.use=PCuse,k.param=30, - print.output=F,resolution=resVal, - algorithm=3,n.start=100,n.iter=100,save.SNN=T) - ## 30 is the default K, but as with tSNE perplexity, a smaller value might be - ## more appropriate with small cell numbers. - print(paste(length(levels(eb1S@ident)),"clusters identified")) - } else { - eb1S <- FindClusters(eb1S,print.output=F,resolution=resVal, - algorithm=3,n.start=100,n.iter=100,reuse.SNN=T) - print(paste(length(levels(eb1S@ident)),"clusters identified")) - if (all(eb1S@meta.data[,ncol(eb1S@meta.data)-1] == - eb1S@meta.data[,ncol(eb1S@meta.data)])) { - eb1S@meta.data <- eb1S@meta.data[,-ncol(eb1S@meta.data)] - next - } - } - res <- colnames(eb1S@meta.data)[length(colnames(eb1S@meta.data))] - - #### Precalculate stats for viz tool #### - print("") - print("") - print(paste("Calculating cluster gene summary statistics for",res)) - print("-- Gene detection rate per cluster --") - DR <- pbapply(eb1S@data,1,function(X) tapply(X,eb1S@ident, - function(Y) sum(Y>0)/length(Y))) - print("-- Mean detected gene expression per cluster --") - MDTC <- pbapply(eb1S@data,1,function(X) tapply(X,eb1S@ident,function(Y) { - temp <- mean.logX(Y[Y>0]) - if (is.na(temp)) { temp <- 0 } - return(temp) - })) - print("-- Mean gene expression per cluster --") - MTC <- pbapply(eb1S@data,1,function(X) tapply(X,eb1S@ident,mean.logX)) - if (length(levels(eb1S@ident)) <= 1) { - CGS[[res]] <- list(data.frame(DR=DR,MDTC=MDTC,MTC=MTC)) - names(CGS[[res]]) <- levels(eb1S@ident) - next - } else { - CGS[[res]] <- sapply(levels(eb1S@ident),function(X) - data.frame(DR=DR[X,],MDTC=MDTC[X,],MTC=MTC[X,]),simplify=F) - } - - #### deTissue - DE per cluster vs all other data #### - print("") - print(paste("Calculating DE vs tissue for",res,"with", - length(levels(eb1S@ident)),"clusters")) - print("-- LogFC calculations --") - deT_logFC <- pbsapply(levels(eb1S@ident),function(i) - MTC[i,] - apply(eb1S@data[,eb1S@ident != i],1,mean.logX)) - deT_genesUsed <- apply(deT_logFC,2,function(X) which(X > logFCthresh)) - if (any(sapply(deT_genesUsed,length) < 1)) { - stop(paste0("logFCthresh should be set to less than ", - min(apply(deT_logFC,2,function(X) max(abs(X)))), - ", the largest magnitude logFC between cluster ", - names(which.min(apply(deT_logFC,2,function(X) max(abs(X))))), - " and the remaining data.")) - } - print("-- Wilcoxon rank sum calculations --") - deT_pVal <- pbsapply(levels(eb1S@ident),function(i) - apply(eb1S@data[deT_genesUsed[[i]],],1,function(X) - wilcox.test(X[eb1S@ident == i],X[eb1S@ident != i])$p.value),simplify=F) - deTissue[[res]] <- sapply(levels(eb1S@ident),function(i) - data.frame(logFC=deT_logFC[deT_genesUsed[[i]],i], - pVal=deT_pVal[[i]])[order(deT_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,deTissue[[res]])$pVal,"fdr"), - rep(names(sapply(deTissue[[res]],nrow)), - sapply(deTissue[[res]],nrow)),c) - for (i in names(deTissue[[res]])) { deTissue[[res]][[i]]$qVal <- tempQval[[i]] } - - #### deMarker - DE per cluster vs each other cluster #### - combos <- combn(levels(eb1S@ident),2) - colnames(combos) <- apply(combos,2,function(X) paste(X,collapse="-")) - print("") - print(paste("Calculating marker DE for",res,"with", - ncol(combos),"combinations of clusters")) - deM_dDR <- apply(combos,2,function(i) DR[i[1],] - DR[i[2],]) - deM_logFC <- apply(combos,2,function(i) MTC[i[1],] - MTC[i[2],]) - deM_genesUsed <- sapply(colnames(deM_logFC),function(X) - which(abs(deM_logFC[,X]) > logFCthresh),simplify=F) - if (any(sapply(deM_genesUsed,length) < 1)) { - stop(paste0("logFCthresh should be set to less than ", - min(apply(deM_logFC,2,function(X) max(abs(X)))), - ", the largest magnitude logFC between clusters ", - names(which.min(apply(deM_logFC,2,function(X) max(abs(X))))),".")) - } - deM_pVal <- pbsapply(colnames(combos),function(i) - apply(eb1S@data[deM_genesUsed[[i]],],1,function(X) - suppressWarnings(wilcox.test(X[eb1S@ident == combos[1,i]], - X[eb1S@ident == combos[2,i]])$p.value)),simplify=F) - temp_deVS <- sapply(colnames(combos),function(i) - data.frame(dDR=deM_dDR[deM_genesUsed[[i]],i],logFC=deM_logFC[deM_genesUsed[[i]],i], - pVal=deM_pVal[[i]])[order(deM_pVal[[i]]),],simplify=F) - tempQval <- tapply(p.adjust(do.call(rbind,temp_deVS)$pVal,"fdr"), - rep(names(sapply(temp_deVS,nrow)),sapply(temp_deVS,nrow)),c) - for (i in names(temp_deVS)) { temp_deVS[[i]]$qVal <- tempQval[[i]] } - - deVS[[res]] <- sapply(levels(eb1S@ident),function(i) { - combos <- strsplit(names(temp_deVS),"-") - temp <- list() - for (X in seq_along(combos)) { - if (! i %in% combos[[X]]) { - next - } else if (which(combos[[X]] == i) == 1) { - temp[[combos[[X]][2]]] <- temp_deVS[[X]][temp_deVS[[X]]$logFC > 0 & - temp_deVS[[X]]$qVal < WRSTalpha,] - } else if (which(combos[[X]] == i) == 2) { - temp[[combos[[X]][1]]] <- temp_deVS[[X]][temp_deVS[[X]]$logFC < 0 & - temp_deVS[[X]]$qVal < WRSTalpha,] - temp[[combos[[X]][1]]]$dDR <- temp[[combos[[X]][1]]]$dDR * -1 - temp[[combos[[X]][1]]]$logFC <- temp[[combos[[X]][1]]]$logFC * -1 - } - } - return(temp) - },simplify=F) - - deMarker[[res]] <- sapply(deVS[[res]],function(X) { - markerGenes <- Reduce(intersect,lapply(X,rownames)) - temp <- sapply(X,function(Y) Y[markerGenes,c("dDR","logFC","qVal")],simplify=F) - names(temp) <- paste("vs",names(temp),sep=".") - return(do.call(cbind,temp)) - },simplify=F) - - #### deNeighb - DE between closest neighbouring clusters #### - nb <- apply(dist(apply(eb1S@dr$tsne@cell.embeddings,2, - function(X) tapply(X,eb1S@ident,mean)),diag=T,upper=T),2, - function(Z) names(which.min(Z[Z > 0]))) - - deNeighb[[res]] <- mapply(function(NB,VS) - VS[[NB]][,c("dDR","logFC","qVal")],NB=nb,VS=deVS[[res]],SIMPLIFY=F) - for (i in names(deNeighb[[res]])) { - colnames(deNeighb[[res]][[i]]) <- paste("vs",nb[i], - colnames(deNeighb[[res]][[i]]),sep=".") - } - minNeighbDE <- min(sapply(deNeighb[[res]],nrow)) - } - save(eb1S,CGS,deTissue,deMarker,deNeighb,deVS,deNeighb,file=paste0(dataPath,"eb1S.RData")) - } else { - #### Static visualization of clustering results #### - ## This is done seperately from the processing, because occassionally RStudio crashes - ## while making figures, and having that interrupt the slow clustering loop sucks. - load(paste0(dataPath,"eb1S.RData")) - print(paste("Data loaded from",paste0(dataPath,"eb1S.RData"))) - - - temp_max <- max(unlist(sapply(deNeighb,function(X) sapply(X,nrow)))) - for (res in grep("^res",colnames(eb1S@meta.data),value=T)) { - temp_cl <- as.factor(eb1S@meta.data[,res]) - if (length(levels(temp_cl)) <= 8) { - clustCols <- brewer.pal(length(levels(temp_cl)),"Dark2")[1:length(levels(temp_cl))] - } else { - clustCols <- rainbow2(length(levels(temp_cl))) - } - - layout(matrix(c(1,4,2,3),2),heights=c(3,1)) - par(mar=c(4,1,3,3),mgp=2:0) - tempSil <- silhouette(as.integer(temp_cl), - dist(eb1S@dr$pca@cell.embeddings[,seq(1,maxPCt)],method="euclidean")) - plot(tempSil,main=res,col=clustCols,border=NA) - - par(mar=c(3,3,1,1),mgp=2:0) - plot(eb1S@dr$tsne@cell.embeddings,pch=21,xlab="tSNE_1",ylab="tSNE_2", - col=alpha(clustCols[temp_cl],.7), - bg=alpha(clustCols[temp_cl],.3)) - text(apply(eb1S@dr$tsne@cell.embeddings,2,function(X) tapply(X,temp_cl,mean)), - labels=levels(temp_cl),font=2) - - par(mar=c(3.5,0.2,1,1)) - plot(x=NA,y=NA,xlim=c(30,temp_max+20),ylim=c(0.5,2.5), - xaxs="i",yaxt="n",frame.plot=F,ylab="",xlab="",lab=c(10,5,7)) - boxplot(cbind(sapply(deMarker[[res]],nrow), - sapply(deNeighb[[res]],nrow)), - add=T,horizontal=T,frame.plot=F,yaxt="n",xaxt="n") - abline(v=30,lty=3) - - par(mar=c(3.5,6,1,0.2)) - plot(x=NA,y=NA,xlim=c(-1,30),ylim=c(0.5,2.5), - xaxs="i",yaxt="n",frame.plot=F,ylab="",xlab="") - boxplot(cbind(sapply(deMarker[[res]],nrow), - sapply(deNeighb[[res]],nrow)), - add=T,horizontal=T,frame.plot=F,yaxt="n",xaxt="n") - abline(v=30,lty=3) - par(las=1,mgp=c(0,0,0)) - axis(2,at=1:2,labels=c("vs all clusters","vs neighbour"),lty=0) - mtext("Number of differentially expressed genes per cluster", - side=1,line=2.5,at=30,xpd=NA) - } - } - - ## [1] "Data loaded from ../scClustViz_files/demo_10Xneurons900/eb1S.RData" - -![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-1.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-2.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-3.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-4.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-5.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-6.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-7.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-8.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-9.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-10.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-11.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-12.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-13.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-14.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-15.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-16.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-17.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-18.png)![](pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-19.png) - - if (!file.exists(paste0(dataPath,eb1S@project.name,"_forViz.RData"))) { - #### Convert to scClustViz inputs #### - nge <- eb1S@data - ## ^ normalized gene expression matrix (matrix: genes x cells) - md <- eb1S@meta.data[,!grepl("^res",colnames(eb1S@meta.data))] - ## ^ metadata for cells (dataframe of cells) - if (is.data.frame(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))])) { - cl <- data.frame(lapply(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))], - as.factor)) - } else { - cl <- data.frame(eb1S@meta.data[,grepl("^res",colnames(eb1S@meta.data))]) - colnames(cl) <- grep("^res",colnames(eb1S@meta.data),value=T) - } - rownames(cl) <- rownames(md) - ## ^ cluster assignments per clustering resolution - ## (dataframe: cells x cluster labels as factors) - dr_clust <- eb1S@dr$pca@cell.embeddings[,eb1S@calc.params$RunTSNE$dims.use] - ## ^ cell embeddings in low-dimensional space used for clustering distances - ## (matrix: cells x dimensions) - ## Only including those dimensions used in downstream analysis - ## (ie. those passed to RunTSNE and FindClusters) - ## if that information is present (in calc.params). - ## Else, using all lower dimensions available. - dr_viz <- eb1S@dr$tsne@cell.embeddings - ## ^ cell embeddings in 2D space for visualization - ## (usually tSNE) (matrix: cells x coordinates) - - #### Save outputs for visualization #### - save(nge,md,cl,dr_clust,dr_viz, - CGS,deTissue,deMarker,deNeighb, - file=paste0(dataPath,eb1S@project.name,"_forViz.RData")) - ## ^ Saved objects for use in visualization script (RunVizScript.R). - } else { - print(paste("Did not overwrite",paste0(dataPath,"_forViz.RData"))) - } - - ## [1] "Did not overwrite ../scClustViz_files/demo_10Xneurons900/_forViz.RData" diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust.pdf b/ToBeConvertedToPkg/pipeline/pipeline_Clust.pdf deleted file mode 100644 index 64cdf6d..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust.pdf and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-1.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-1.png deleted file mode 100644 index f14d865..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-10.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-10.png deleted file mode 100644 index e053ae5..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-10.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-11.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-11.png deleted file mode 100644 index 63ca37b..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-11.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-12.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-12.png deleted file mode 100644 index f851a22..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-12.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-13.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-13.png deleted file mode 100644 index f37b43f..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-13.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-14.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-14.png deleted file mode 100644 index bc6857b..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-14.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-15.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-15.png deleted file mode 100644 index 3f89a39..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-15.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-16.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-16.png deleted file mode 100644 index b84c9ff..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-16.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-17.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-17.png deleted file mode 100644 index 95119a7..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-17.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-18.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-18.png deleted file mode 100644 index f65df8b..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-18.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-19.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-19.png deleted file mode 100644 index 2e9a8a8..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-19.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-2.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-2.png deleted file mode 100644 index 996de5e..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-2.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-3.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-3.png deleted file mode 100644 index 2ff3714..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-3.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-4.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-4.png deleted file mode 100644 index 04559e3..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-4.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-5.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-5.png deleted file mode 100644 index c413f46..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-5.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-6.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-6.png deleted file mode 100644 index 8586d73..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-6.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-7.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-7.png deleted file mode 100644 index 6675b6c..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-7.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-8.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-8.png deleted file mode 100644 index a7c829c..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-8.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-9.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-9.png deleted file mode 100644 index b891c16..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/ClusterAndDE-9.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/PCA_select-1.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/PCA_select-1.png deleted file mode 100644 index 9ddb0c7..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/PCA_select-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-1.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-1.png deleted file mode 100644 index f0aed00..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-2.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-2.png deleted file mode 100644 index 8f368de..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-2.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-3.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-3.png deleted file mode 100644 index b06b898..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-3.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-4.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-4.png deleted file mode 100644 index f1a0b73..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-4.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-5.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-5.png deleted file mode 100644 index 31b3809..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/plotPCs-5.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-1.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-1.png deleted file mode 100644 index 9f228c6..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-2.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-2.png deleted file mode 100644 index f4fcf1f..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-2.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-3.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-3.png deleted file mode 100644 index 9e49a52..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-3.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-4.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-4.png deleted file mode 100644 index 57a0a6a..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-4.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-5.png b/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-5.png deleted file mode 100644 index fca6a02..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_Clust_files/figure-markdown_strict/tSNE-5.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN.Rmd b/ToBeConvertedToPkg/pipeline/pipeline_QCN.Rmd deleted file mode 100644 index 602c8b9..0000000 --- a/ToBeConvertedToPkg/pipeline/pipeline_QCN.Rmd +++ /dev/null @@ -1,718 +0,0 @@ ---- -title: "QCN" -output: - md_document: - toc: yes - pdf_document: - toc: yes ---- -# Incorporating scClustViz into your analysis pipeline -In this vignette we will show how scClustViz can be incorporated into existing analysis pipelines using this example - - -## Quality control and normalization - -```{r Setup, echo=T} -input_dataPath <- "../../../scClustViz_files/neurons_900_filtered_gene_bc_matrices/filtered_gene_bc_matrices/mm10/" -## Path to 10X output directory (containing .mtx file) -## i.e. http://cf.10xgenomics.com/samples/cell-exp/2.1.0/neurons_900/neurons_900_filtered_gene_bc_matrices.tar.gz -dataPath <- "../scClustViz_files/demo_10Xneurons900/" -## Path to analysis output directory -## Will be created if it doesn't already exist -dataSpecies <- "mouse" -## Set species ("mouse"/"human" or add your own - see below) -dataName <- "10Xneurons" -## Name your analysis results - - -library(Matrix) -library(scales) -library(viridis) -library(RColorBrewer) -library(scran) # from Bioconductor -library(DropletUtils) # from Bioconductor - - -if (dataSpecies == "mouse") { - speciesMito <- "^mt-" - cycloneSpeciesMarkers <- "mouse_cycle_markers.rds" -} else if (dataSpecies == "human") { - speciesMito <- "^MT-" - cycloneSpeciesMarkers <- "human_cycle_markers.rds" -} else { print("Set species please!") } -dir.create(dataPath,recursive=T,showWarnings=F) - -plotHistHoriz <- function(input,col="grey80",add=F) { - tempH <- hist(input,breaks=50,plot=F) - if (!add) { - plot(x=NULL,y=NULL,xlim=range(tempH$counts),ylim=range(input), - bty="n",ylab=NA,yaxt="n",xaxt="n",xlab=NA) - } - rect(xleft=rep(0,length(tempH$counts)),ybottom=tempH$breaks[-length(tempH$breaks)], - xright=tempH$counts,ytop=tempH$breaks[-1],col=col) -} - -``` - -## Cell filtering - -```{r load_data_10Xchromium, echo=T} -ebRaw <- counts(read10xCounts(input_dataPath,col.names=T)) -ebRaw <- ebRaw[,Matrix::colSums(ebRaw) > 0] -ebRaw <- ebRaw[Matrix::rowSums(ebRaw) > 0,] - -mart <- useEnsembl("ensembl","mmusculus_gene_ensembl") -symbolMap <- getBM(c("ensembl_gene_id","mgi_symbol"), - filters="ensembl_gene_id",values=rownames(ebRaw),mart=mart) -mitoGenes <- symbolMap[grep(speciesMito,symbolMap$mgi_symbol),"ensembl_gene_id"] - - -## Check rownames at this point. This workflow expects MGI or HGNC gene symbols as the -## rownames. If the data was aligned to a reference genome that used ensembl gene IDs or -## another annotation you will have to either convert them to gene symbols, or identify -## which ensembl IDs correspond to the mitochondrial genome for the mitoFilt step. - -geneRowNames <- "ensembl_gene_id" # Set if rownames are ensemble gene IDs -#geneRowNames <- speciesSymbol # Set if rownames are gene symbols (no conversion). - -#### - -if (geneRowNames != speciesSymbol) { - e2g <- getBM(attributes=c(geneRowNames,speciesSymbol), - mart=mart,filters=geneRowNames, - values=rownames(ebRaw)) - if (nrow(e2g) < 10) { - stop("Check row names and select appropriate rowname identifier!") - } - ## Removing unmapped gene symbols from conversion table - e2g <- e2g[e2g[,speciesSymbol] != "",] - print(paste(sum(duplicated(e2g[,geneRowNames])), - geneRowNames,"mapped to multiple",speciesSymbol)) - ## Arbitrarily picking one mapping for the above duplicates, - ## since these generally map to predicted genes anyway. - e2g <- e2g[!duplicated(e2g[,geneRowNames]),] - rownames(e2g) <- e2g[,geneRowNames] - ebRaw <- ebRaw[e2g[,geneRowNames],] # removing unmapped genes from data - temp_repName <- unique(e2g[,speciesSymbol][duplicated(e2g[,speciesSymbol])]) - print(paste(length(temp_repName),speciesSymbol,"mapped to multiple",geneRowNames)) - ## Going to collapse these by summing UMI counts between duplicated rows. - temp_repRow <- ebRaw[e2g[,speciesSymbol] %in% temp_repName,] - ebRaw <- ebRaw[!e2g[,speciesSymbol] %in% temp_repName,] - ## Removed duplicated rows from data, saved as separate object - rownames(ebRaw) <- e2g[rownames(ebRaw),speciesSymbol] # renamed rows in data as symbols - temp_repRow <- Matrix(t(sapply(temp_repName,function(X) - Matrix::colSums(temp_repRow[e2g[,geneRowNames][e2g[,speciesSymbol] == X],]))),sparse=T) - ## Collapsed by summing each duplicated gene symbol's row - ebRaw <- rbind(ebRaw,temp_repRow) # added those data back to matrix -} - -## Consolidating duplicated gene names -## (if genes are in under their MGI and HGNC symbols, or some other annotation mixup) -if (any(duplicated(toupper(rownames(ebRaw))))) { - temp_repName <- unique(toupper(rownames(ebRaw)[duplicated(toupper(rownames(ebRaw)))])) - print(paste(dataName,"-",length(temp_repName),"duplicated gene names.")) - ## Going to collapse these by summing UMI counts between duplicated rows. - temp_repRow <- ebRaw[toupper(rownames(ebRaw)) %in% temp_repName,] - ebRaw <- ebRaw[!toupper(rownames(ebRaw)) %in% temp_repName,] - if (!exists("e2g")) { - e2g <- getBM(attributes=speciesSymbol, - mart=mart,filters=speciesSymbol, - values=rownames(temp_repRow)) - } - rownames(e2g) <- toupper(e2g$mgi_symbol) - if (any(!temp_repName %in% rownames(e2g))) { - warning(paste("Some of your duplicated rownames aren't", - speciesSymbol,"and are being removed.")) - temp_repName <- temp_repName[temp_repName %in% rownames(e2g)] - } - temp_repRow <- Matrix(t(sapply(temp_repName,function(X) - Matrix::colSums(temp_repRow[toupper(rownames(temp_repRow)) == X,]))),sparse=T) - rownames(temp_repRow) <- e2g[rownames(temp_repRow),"mgi_symbol"] - ebRaw <- rbind(ebRaw,temp_repRow) # added those data back to matrix - rm(temp_repName,temp_repRow) -} -rm(list=ls()[grepl("temp",ls())]) -``` - -```{r plot_knee2, echo=T,fig.height=4.2,fig.width=8.4} -libSize <- Matrix::colSums(ebRaw) -cumCounts <- cumsum(libSize[order(libSize,decreasing=T)]) -maxCount <- 10^ceiling(log10(max(libSize))) -countCols <- cut(log10(libSize[order(libSize,decreasing=T)]), - breaks=floor(log10(min(libSize))):ceiling(log10(max(libSize))), - labels=F,include.lowest=T) -layout(matrix(c(3,3,1:2),nrow=2,byrow=T),heights=c(1,3.2)) -par(mar=c(3,3,1,1),mgp=2:0) -plot(seq_along(cumCounts),cumCounts/max(cumCounts),pch=19,ylim=c(0,1), - col=viridis(max(countCols),d=-1)[countCols], - xlab="Cell libraries (largest to smallest)",ylab="Cumulative fraction of UMIs") -plot(seq_along(cumCounts),cumCounts/max(cumCounts),pch=19,log="x",ylim=c(0,1), - col=viridis(max(countCols),d=-1)[countCols], - xlab="Cell libraries (largest to smallest)",ylab="Cumulative fraction of UMIs") -par(mar=c(3,1,2,1)) -barplot(rep(1,max(countCols)),col=viridis(max(countCols),d=-1), - space=0,border=NA,axes=F,xlab="UMIs per cell") -axis(side=1,at=0:max(countCols), - labels=sapply(floor(log10(min(libSize))):ceiling(log10(max(libSize))), - function(X) 10^X)) -title(main="STAMPs from cells by UMIs per STAMP") -``` - -This is to check the cell filtering done in CellRanger. If you have cells with small library sizes, or a fairly horizontal line in the knee plot, you might consider revisiting the filtering for "cells" vs empty droplets. - - -### Mitochondrial gene content - -Filtering cells based on the proportion of mitochondrial gene transcripts per cell. A high proportion of mitochondrial gene transcripts are indicative of poor quality cells, probably due to compromised cell membranes. Removal of these cells should not decrease the complexity of the overall dataName (measured by number of genes detected), while removing a source of noise. - -```{r mitoFilt, echo=T,fig.height=4.2,fig.width=8.4} -### Parameters you could edit ### -drop_mitoMads <- 4 -## ^ Median absolute deviations from the median to use as -## threshold for mitochondrial transcript proprotion. -hard_mitoCut <- 0.4 -# ^ Hard threshold for mitochondrial transcript proportion - -### Calculations and filtering ### -temp_geneDetectFx <- function(ebRaw) { - if (is.null(slotNames(ebRaw))) { - apply(ebRaw,2,function(X) sum(X>0)) - } else if ("j" %in% slotNames(ebRaw)) { - as.vector(table(ebRaw@j)) - } else { - as.vector(table(rep(seq_along(diff(ebRaw@p)),diff(ebRaw@p)))) - } -} -cellStats <- data.frame( - libSize=Matrix::colSums(ebRaw), - geneDetect=temp_geneDetectFx(ebRaw), - mitoPct=Matrix::colSums(ebRaw[grepl(speciesMito,rownames(ebRaw)),]) / - Matrix::colSums(ebRaw) -) - -drop_mitoCut <- median(cellStats$mitoPct) + mad(cellStats$mitoPct) * drop_mitoMads -if (drop_mitoCut > hard_mitoCut) { drop_mitoCut <- hard_mitoCut } -drop_mito <- cellStats$mitoPct > drop_mitoCut - -ebRawF <- ebRaw[,!drop_mito] -ebRawF <- ebRawF[Matrix::rowSums(ebRawF) > 0,] -cellStatsF <- cellStats[!drop_mito,] - -### Plotting ### -layout(matrix(c(2,1,0,3,5,4,0,6),2),c(3.7,.5,3.7,.5),c(0.5,3.7)) -par(mar=c(3,3,0,0),mgp=2:0) -plot(mitoPct~libSize,data=cellStats,log="x", - pch=21,cex=1.2,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab="Transcripts detected per cell (log scale)", - ylab="Transcript proportion from mitochondrial genome") -abline(h=drop_mitoCut,lwd=2,lty=2,col=alpha("red",0.5)) -mtext(paste(dataName,"damaged cells"),side=3,adj=0.98,line=-1.5,font=2,cex=1) -legend("topright",bty="n",inset=c(0,.05), - lty=c(2,NA,NA,NA,NA),lwd=c(2,NA,NA,NA,NA),col=alpha(c("red",NA,NA,NA,NA),0.5), - legend=c(paste(drop_mitoMads,"MADs above median"), - paste(sum(drop_mito),"cells removed"), - paste(ncol(ebRawF),"cells remain"), - paste(nrow(ebRaw)-nrow(ebRawF),"genes removed"), - paste(nrow(ebRawF),"genes remain"))) -par(mar=c(0,3,0,0)) -hist(log10(cellStats$libSize),freq=F,breaks=50,col="grey80", - main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -par(mar=c(3,0,0,0)) -plotHistHoriz(cellStats$mitoPct) - -par(mar=c(3,3,0,0)) -plot(mitoPct~geneDetect,data=cellStats,log="x", - pch=21,cex=1.2,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab="Genes detected per cell (log scale)", - ylab="Transcript proportion from mitochondrial genome") -abline(h=drop_mitoCut,lwd=2,lty=2,col=alpha("red",0.5)) -mtext(paste(dataName,"damaged cells"),side=3,adj=0.98,line=-1.5,font=2,cex=1) -legend("topright",bty="n",inset=c(0,.05), - lty=c(2,NA,NA,NA,NA),lwd=c(2,NA,NA,NA,NA),col=alpha(c("red",NA,NA,NA,NA),0.5), - legend=c(paste(drop_mitoMads,"MADs above median"), - paste(sum(drop_mito),"cells removed"), - paste(ncol(ebRawF),"cells remain"), - paste(nrow(ebRaw)-nrow(ebRawF),"genes removed"), - paste(nrow(ebRawF),"genes remain"))) -par(mar=c(0,3,0,0)) -hist(log10(cellStats$geneDetect),freq=F,breaks=50,col="grey80", - main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -par(mar=c(3,0,0,0)) -plotHistHoriz(cellStats$mitoPct) -``` - -### Outlier filtering - -Doublet rate is best controlled experimentally, by reducing the concentration of the cells in the input suspension. Filtering for doublets by library size doesn't really work, since the dispersion of library sizes means that doublets containing two "small" cells will still have a smaller library size than one "large" cell. However, it is important to manually inspect the relationship between library size and gene detection rates per cell to identify obvious outliers. Outliers can be identified systematically using a sufficiently extreme number of median absolute deviations from the median, assuming a moderately normal distribution. Since library sizes tend to be log-normal, we use log-transformed library size to identify outliers. - -```{r doubFilt, echo=T,fig.height=4.2,fig.width=8.4,fig.show="hold"} -### Parameters you could edit ### -#outToInspect <- rep(F,nrow(cellStatsF)) # None -outToInspect <- with(cellStatsF,geneDetect < (libSize * 0.08 + 50)) -## Defining a line below which the curious cells lie - -# outToRemove <- rep(F,nrow(cellStatsF)) # None -numMADs <- 4 # Median absolute deviations from the median to consider as an outlier. -outToRemoveHi <- log10(cellStatsF$libSize) > median(log10(cellStatsF$libSize)) + - mad(log10(cellStats$libSize)) * numMADs -outToRemoveLo <- log10(cellStatsF$libSize) < median(log10(cellStatsF$libSize)) - - mad(log10(cellStats$libSize)) * numMADs -outToRemove <- outToRemoveHi | outToRemoveLo -## Assuming an approximately log-normal distribution of library sizes, this removes -## obvious outliers. (4 MADs from the median of the log-transformed library size) - - -### Calculations and filtering ### -ebRawF2 <- ebRawF[,!outToRemove] -ebRawF2 <- ebRawF2[Matrix::rowSums(ebRawF2) > 0,] - -### Plotting ### -layout(cbind(matrix(c(2,1,0,3),2),matrix(c(5,4,0,6),2)), - widths=c(3.5,.7,3.5,.7),heights=c(.7,3.5)) -par(mar=c(3,3,0,0),mgp=2:0) -plot(geneDetect~libSize,data=cellStatsF[!outToInspect,], - xlim=range(cellStatsF$libSize),ylim=range(cellStatsF$geneDetect), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1),cex=1.2, - xlab="Transcripts detected per cell",ylab="Genes detected per cell") -points(geneDetect~libSize,data=cellStatsF[outToInspect,], - pch=24,col=alpha("blue",0.2),bg=alpha("blue",0.1),cex=1.2) -points(geneDetect~libSize,data=cellStatsF[outToRemove,], - pch=4,cex=1.2,col="red") -mtext(paste(dataName,"cell stats"),side=3,adj=0.02,line=-1.5,font=2,cex=1) -legend("bottomright",bty="n",pch=c(24,4,NA,NA), - col=c("blue","red",NA,NA),pt.bg=alpha(c("blue",NA,NA,NA),0.3), - legend=c(paste("Outliers to inspect:",sum(outToInspect)), - paste("Outliers to remove:",sum(outToRemove)), - paste(nrow(ebRawF)-nrow(ebRawF2),"genes removed"), - paste(nrow(ebRawF2),"genes remain"))) -par(mar=c(0,3,1,0)) -hist(cellStatsF$libSize[!outToInspect | !outToRemove], - freq=T,breaks=50,col="grey80",main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -if (any(outToInspect)) { - hist(cellStatsF$libSize[outToInspect],add=T, - freq=T,breaks=50,col=alpha("blue",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -} -if (any(outToRemove)) { - hist(cellStatsF$libSize[outToRemove],add=T, - freq=T,breaks=50,col=alpha("red",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -} -par(mar=c(3,0,0,1)) -plotHistHoriz(cellStatsF$geneDetect[!outToInspect | !outToRemove]) -if (any(outToInspect)) { - plotHistHoriz(cellStatsF$geneDetect[outToInspect],col=alpha("blue",.5),add=T) -} -if (any(outToRemove)) { - plotHistHoriz(cellStatsF$geneDetect[outToRemove],col=alpha("red",.5),add=T) -} - -par(mar=c(3,3,0,0),mgp=2:0) -plot(geneDetect~libSize,data=cellStatsF[!outToInspect,],log="xy", - xlim=range(cellStatsF$libSize),ylim=range(cellStatsF$geneDetect), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1),cex=1.2, - xlab="Transcripts detected per cell (log scale)", - ylab="Genes detected per cell (log scale)") -points(geneDetect~libSize,data=cellStatsF[outToInspect,], - pch=24,col=alpha("blue",0.2),bg=alpha("blue",0.1),cex=1.2) -points(geneDetect~libSize,data=cellStatsF[outToRemove,], - pch=4,cex=1.2,col="red") -mtext(paste(dataName,"cell stats"),side=3,adj=0.02,line=-1.5,font=2,cex=1) -legend("topleft",bty="n",inset=c(0,.05),pch=c(24,4,NA,NA), - col=c("blue","red",NA,NA),pt.bg=alpha(c("blue",NA,NA,NA),0.3), - legend=c(paste("Outliers to inspect:",sum(outToInspect)), - paste("Outliers to remove:",sum(outToRemove)), - paste(nrow(ebRawF)-nrow(ebRawF2),"genes removed"), - paste(nrow(ebRawF2),"genes remain"))) -par(mar=c(0,3,1,0)) -hist(log10(cellStatsF$libSize[!outToInspect | !outToRemove]), - freq=T,breaks=50,col="grey80",main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -if (any(outToInspect)) { - hist(log10(cellStatsF$libSize[outToInspect]),add=T, - freq=T,breaks=50,col=alpha("blue",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -} -if (any(outToRemove)) { - hist(log10(cellStatsF$libSize[outToRemove]),add=T, - freq=T,breaks=50,col=alpha("red",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") -} -par(mar=c(3,0,0,1)) -plotHistHoriz(log10(cellStatsF$geneDetect[!outToInspect | !outToRemove])) -if (any(outToInspect)) { - plotHistHoriz(log10(cellStatsF$geneDetect[outToInspect]),col=alpha("blue",.5),add=T) -} -if (any(outToRemove)) { - plotHistHoriz(log10(cellStatsF$geneDetect[outToRemove]),col=alpha("red",.5),add=T) -} -``` - -It's never a bad idea to inspect libraries before removing them, or at least ensuring that by removing them you are not losing genes from the analysis (which might imply that those cells were a unique population/cell-type). - - -### Outlier inspection - -```{r what_are_those_cells, echo=T,fig.height=4.2,fig.width=8.4} -if (any(outToInspect)) { - ### Parameters you could edit ### - topNum <- 4 - ## Number of highly expressed genes in outlier population to highlight - removeOutlierPopulation <- T - ## Do you want to remove this outlier population from the analysis? - - ### Calculations ### - gsOut <- data.frame( - DR=apply(ebRawF2[,outToInspect[!outToRemove]],1,function(X) sum(X > 0)/length(X)), - MDTC=apply(ebRawF2[,outToInspect[!outToRemove]],1,function(X) mean(X[X>0])), - MTC=Matrix::rowMeans(ebRawF2[,outToInspect[!outToRemove]]) - ) - rownames(gsOut) <- rownames(ebRawF2) - gsIn <- data.frame( - DR=apply(ebRawF2[,!outToInspect[!outToRemove]],1,function(X) sum(X > 0)/length(X)), - MDTC=apply(ebRawF2[,!outToInspect[!outToRemove]],1,function(X) mean(X[X>0])), - MTC=Matrix::rowMeans(ebRawF2[,!outToInspect[!outToRemove]]) - ) - rownames(gsIn) <- rownames(ebRawF2) - if (removeOutlierPopulation) { - ebRawF3 <- ebRawF2[,!outToInspect[!outToRemove]] - ebRawF3 <- ebRawF3[Matrix::rowSums(ebRawF3) > 0,] - } else { ebRawF3 <- ebRawF2 } - topHits <- 1:nrow(gsOut) %in% head(order(gsOut$MTC,decreasing=T),topNum) - - ### Plotting ### - par(mfrow=c(1,2),mar=c(3,3,2,1),mgp=2:0) - plot(log10(MDTC)~DR,data=gsOut, - pch=21,cex=1.2,xlab="Proportion of cells detecting gene", - ylab=expression(Log[10]~"Mean non-zero gene count"), - col=alpha(c("black","red"),0.3)[topHits+1], - bg=alpha(c("black","red"),0.1)[topHits+1], - main=paste(dataName,"outlier cells")) - text(log10(MDTC)~DR,data=gsOut[topHits,], - labels=rownames(gsOut)[topHits],pos=2,col="red",cex=1.2) - if (removeOutlierPopulation) { - legend("topleft",inset=c(-.06,-.02),bty="n",cex=0.9, - legend=c(paste(ncol(ebRawF2)-ncol(ebRawF3),"cells removed"), - paste(ncol(ebRawF3),"cells remain"), - paste(nrow(ebRawF2)-nrow(ebRawF3),"genes removed"), - paste(nrow(ebRawF3),"genes remain"))) - } else { - legend("topleft",inset=c(-.06,-.02),bty="n",cex=0.9, - legend=c(paste(sum(outToInspect[!outToRemove]),"outlier cells"), - paste(sum(!outToInspect[!outToRemove]),"cells in main population"), - paste(nrow(ebRawF3) - - sum(Matrix::rowSums(ebRawF3[,!outToInspect[!outToRemove]]) > 0), - "genes unique to outliers"), - paste(sum(Matrix::rowSums(ebRawF2[,!outToInspect[!outToRemove]]) > 0), - "genes in main population"))) - } - - plot(log10(MDTC)~DR,data=gsIn, - pch=21,cex=1.2,xlab="Proportion of cells detecting gene", - ylab=expression(Log[10]~"Mean non-zero gene count"), - col=alpha(c("black","red"),0.3)[topHits+1], - bg=alpha(c("black","red"),0.1)[topHits+1], - main=paste(dataName,"main cell population")) - text(log10(MDTC)~DR,data=gsIn[topHits,], - labels=rownames(gsIn)[topHits],pos=2,col="red",cex=1.2) -} else { ebRawF3 <- ebRawF2 } -``` - -This outlier population looks to be a red blood cell or progenitor (based on the very high expression of haemoglobin, and low genetic complexity), which seems to be a common contaminant in brain single-cell data. We removed this population from downstream analysis. - - -```{r cleanup1, echo=T} -cellStatsF3 <- cellStats[colnames(ebRawF3),] -rm(list=ls()[!ls() %in% c("ebRawF3","cellStatsF3","dataName","dataPath","plotHistHoriz", - "rainbow2","mart","speciesSymbol","cycloneSpeciesMarkers", - "geneLengthPath")]) -gc() -``` - -Data processing will now be performed based on a workflow published by the Marioni group (Lun et al., F1000Research 2016; http://dx.doi.org/10.12688/f1000research.9501.2). - -```{r SCEset, echo=T} -ebRawS <- SingleCellExperiment(list(counts=ebRawF3)) -ebRawS <- scater::calculateQCMetrics(ebRawS) -colData(ebRawS)$mitoPct <- cellStatsF3$mitoPct -rm(ebRawF3,cellStatsF3) -gc() -``` - - -## Cell cycle prediction using *Cyclone* - -```{r cell_cycle_annotation, echo=T,fig.height=4.2,fig.width=8.4} -if (!file.exists(paste0(dataPath,"ebRawS.RData"))) { - g2e <- getBM(attributes=c(speciesSymbol,"ensembl_gene_id"), - mart=mart,filters=speciesSymbol, - values=rownames(ebRawS)) - cycPairs <- readRDS(system.file("exdata",cycloneSpeciesMarkers,package="scran")) - g2e <- g2e[g2e$ensembl_gene_id %in% unique((unlist(cycPairs))),] - tempCyc <- cyclone(ebRawS[g2e$mgi_symbol,], - gene.names=g2e$ensembl_gene_id,pairs=cycPairs) - tempCyc$confidence <- sapply(seq_along(tempCyc$phases),function(i) - tempCyc$normalized.scores[i,tempCyc$phases[i]]) - colnames(tempCyc$scores) <- paste0("cycScore.",colnames(tempCyc$scores)) - colnames(tempCyc$normalized.scores) <- paste0("cycScoreNorm.", - colnames(tempCyc$normalized.scores)) - colData(ebRawS) <- cbind(colData(ebRawS),tempCyc$scores,tempCyc$normalized.scores) - colData(ebRawS)$cycPhase <- factor(tempCyc$phases,levels=c("G1","S","G2M")) - colData(ebRawS)$cycConfidence <- tempCyc$confidence - - save(ebRawS,file=paste0(dataPath,"ebRawS.RData")) -} else { - load(paste0(dataPath,"ebRawS.RData")) - print(paste("Data loaded from",paste0(dataPath,"ebRawS.RData"))) -} - -layout(matrix(c(1,2,1,3,1,4),2),widths=c(2,5,1),heights=c(1,9)) -par(mar=rep(0,4),mgp=2:0) -plot.new() -title("Cell cycle phase assignment confidence, library sizes, and distribution per sample", - line=-2,cex.main=1.5) - -par(mar=c(3,3,1,1),bty="n") -boxplot(tapply(colData(ebRawS)$cycConfidence,colData(ebRawS)$cycPhase,c), - col=alpha(brewer.pal(3,"Dark2"),0.7), - ylab="Normalized score of assigned cell cycle phase") - -par(mar=c(3,3,1,1)) -cycDlibSize <- tapply(log10(colData(ebRawS)$total_counts),colData(ebRawS)$cycPhase, - function(X) density(X)) -plot(x=NULL,y=NULL,ylab="Density",xlab=expression(Log[10]~"Library Size"), - xlim=range(log10(colData(ebRawS)$total_counts)), - ylim=c(min(sapply(cycDlibSize,function(X) min(X$y))), - max(sapply(cycDlibSize,function(X) max(X$y))))) -for (x in 1:length(cycDlibSize)) { - lines(cycDlibSize[[x]],col=alpha(brewer.pal(3,"Dark2"),0.7)[x],lwd=3) -} -legend("topright",bty="n",horiz=T,lwd=rep(3,3), - col=alpha(brewer.pal(3,"Dark2"),0.7),legend=levels(colData(ebRawS)$cycPhase)) - -par(mar=c(5,3,1,1)) -barplot(cbind(table(colData(ebRawS)$cycPhase)), - col=alpha(brewer.pal(3,"Dark2"),0.7), - ylab="Proportion of cells",las=2) - -``` - -Cyclone generates individual scores for each cell cycle phase. G1 and G2/M are assigned based on these scores, and any cells not strongly scoring for either phase are assigned to S phase. -Later in the pipeline we will also attempt to predict cell cycle using the method from *Seurat*. - - -## Gene filtering - -Noisy genes must be removed to prevent them from skewing normalization. The filtering method in *Seurat* removes only genes detected in very few cells, which is sufficient for normalization while removing as few genes as possible. - -```{r geneFilt, echo=T,fig.height=6.3,fig.width=6.3,fig.show="hold"} -geneStats <- data.frame(DR=apply(counts(ebRawS),1,function(X) sum(X > 0)/length(X)), - MDTC=apply(counts(ebRawS),1,function(X) mean(X[X > 0])), - cellMax=apply(counts(ebRawS),1,max)) -drop_g <- geneStats$DR < 3/ncol(ebRawS) -ebRawSF <- ebRawS[!drop_g,] - -layout(matrix(c(2,1,0,3),2),widths=c(6,1),heights=c(1,6)) -par(mar=c(3,3,0,0),mgp=2:0) -temp_H <- cut(log10(geneStats[order(geneStats$cellMax,decreasing=F),"cellMax"]), - breaks=100,labels=F) -plot(log10(MDTC)~log10(DR),data=geneStats[order(geneStats$cellMax,decreasing=F),], - pch=21,col=viridis(100,0.5,1,0)[temp_H],bg=viridis(100,0.3,1,0)[temp_H], - xlab=expression(Log[10]~"Proportion of cells detecting gene"), - ylab=expression(Log[10]~"Mean transcript count of detected genes (MDTC)")) -points(log10(MDTC)~log10(DR),data=geneStats[drop_g,], - pch=4,col=alpha("red",0.5),cex=1.2) -legend("top",bty="n",pch=c(4,NA,NA),col=c("red",NA,NA),cex=1.1,inset=c(0,.06), - legend=c("Gene in < 3 cells", - paste("Genes removed:",sum(drop_g)), - paste("Genes remaining:",nrow(ebRawSF)))) -segments(x0=seq(quantile(range(log10(geneStats$DR)),.2), - quantile(range(log10(geneStats$DR)),.8),length.out=1000), - y0=rep(max(log10(geneStats$MDTC)) * 1.02), - y1=rep(max(log10(geneStats$MDTC))),col=viridis(1000)) -text(x=c(quantile(range(log10(geneStats$DR)),.2), - median(range(log10(geneStats$DR))), - quantile(range(log10(geneStats$DR)),.8)), - y=rep(max(log10(geneStats$MDTC)) * .98,3), - labels=c(min(geneStats$cellMax), - expression(Log[10]~bold(max)~transcript~count), - max(geneStats$cellMax)),cex=1.1) - -par(mar=c(0,3,0,0)) -hist(log10(geneStats$DR),freq=T,breaks=100,col="grey80",main=NULL,xaxt="n") -title("Gene expression distribution",line=-2,cex.main=1.5) -par(mar=c(3,0,0,0)) -barplot(hist(log10(geneStats$MDTC),breaks=100,plot=F)$counts, - horiz=T,space=0,col="grey80",main=NULL,xlab="Frequency") -``` - -```{r cleanup2, echo=T} -rm(list=ls()[!ls() %in% c("ebRawSF","dataName","dataPath","plotHistHoriz","rainbow2")]) -gc() -``` - -## Normalization - -Next step is normalization. Marioni proposed a normalization technique that attempts to generate cell-specific size factors that are robust to differential expression between genes in a heterogenous sample, unlike simple library-size normalization (https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0947-7). This method correlates strongly with library size normalization for homogenous samples, but solves a series of linear equations to deconvolute cell-specific size factors for normalization. In order to better handle heterogenous data, they suggest separating the data by simple heirarchical clustering of a Spearman correlation-based distance metric so that they can normalize the separate subpopulations separately to prevent the suppression of true differential expression during normalization. - -Normalization is carried out by assigning size factors per gene by the pooling and deconvolution method, then taking the log-ratio between each count and its size factor, and adding a pseudocount of one. Log-transforming the data stabilizes variance by reducing the impact of a few highly variable genes. - -Following this, it is suggested to investigate sources of technical variance, but without spike-ins or any annotated possible sources of variation, this step is impossible with this data. - -```{r normalize_by_deconvolution_quickCluster, echo=T} -if (!file.exists(paste0(dataPath,"ebNorm.RData"))) { - qClust <- quickCluster(ebRawSF,min.size=200) - ## Clustering of heterogenous data is suggested prior to normalization, - ## but if you have few cells or homogenous data, it may not be necessary. - names(qClust) <- colnames(ebRawSF) - ebRawSF <- computeSumFactors(ebRawSF,clusters=qClust) - ebN <- ebRawSF[,!sizeFactors(ebRawSF) <= 0] - ebN <- normalize(ebN) - naCells <- apply(exprs(ebN),2,function(X) any(is.na(X))) - if (any(naCells)) { - exprs(ebN)[,naCells] <- min(apply(exprs(ebN),1,function(X) min(X,na.rm = T))) - } - save(qClust,ebN,file=paste0(dataPath,"ebNorm.RData")) -} else { - load(paste0(dataPath,"ebNorm.RData")) - print(paste("Data loaded from",paste0(dataPath,"ebNorm.RData"))) -} - -geneStatsN <- data.frame( - DR=apply(exprs(ebN),1,function(X) sum(X > 0))/ncol(ebN), - MDTC=apply(exprs(ebN),1,function(X) mean(X[X > 0])), - MTC=Matrix::rowMeans(exprs(ebN)), - sumTC=Matrix::rowSums(exprs(ebN)), - cellMax=apply(exprs(ebN),1,max) -) - -clustCols <- rainbow2(length(levels(qClust))) -temp_randcells <- sample.int(ncol(ebN)) -``` - -```{r normalize_fig1, echo=T,fig.height=4.2,fig.width=8.4,fig.show="hold"} -temp_times <- as.factor(sub("_.+$","",colnames(ebRawSF))) -layout(matrix(c(2,1,0,3,5,4,0,6),2), - widths=c(3.5,.7,3.5,.7),heights=c(.7,3.5)) - -## GeneDetect~LibSize by QuickCluster -par(mar=c(3,3,0,0),mgp=2:0) -plot(x=log10(colData(ebRawSF)$total_counts)[temp_randcells], - y=log10(colData(ebRawSF)$total_features)[temp_randcells], - pch=21,col=alpha(clustCols,0.4)[qClust][temp_randcells], - bg=alpha(clustCols,0.2)[qClust][temp_randcells], - xlab=expression(log[10]~"Library Size"),ylab=expression(log[10]~"Genes Detected")) -points(log10(colData(ebRawSF)$total_counts)[!colnames(ebRawSF) %in% colnames(ebN)], - log10(colData(ebRawSF)$total_features)[!colnames(ebRawSF) %in% colnames(ebN)], - pch=4,col="red") -mtext("Clusters for normalization",side=3,line=-1.5,font=2) -legend("topleft",bty="n",ncol=2,inset=c(0,.05),lwd=2,pch=21, - col=alpha(clustCols[seq_along(levels(qClust))],0.3), - pt.bg=alpha(clustCols[seq_along(levels(qClust))],0.1), - legend=paste0(levels(qClust),": ",table(qClust))) -legend("bottomright",bty="n",pch=4,col="red", - legend=paste(ncol(ebRawSF) - ncol(ebN),"cells could not be normalized")) - -par(mar=c(0,3,.1,0)) -temp_density <- tapply(log10(colData(ebRawSF)$total_counts),qClust, - function(X) density(X)) -plot(x=NULL,y=NULL,xlim=range(log10(colData(ebRawSF)$total_counts)), - ylim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - xlab=NULL,ylab="Frequency",xaxt="n") -for (x in seq_along(levels(qClust))) { - lines(temp_density[[x]],lwd=2,col=alpha(clustCols,0.7)[x]) -} - -par(mar=c(3,0,0,.1)) -temp_density <- tapply(log10(colData(ebRawSF)$total_features),qClust, - function(X) density(X)) -plot(x=NULL,y=NULL,ylim=range(log10(colData(ebRawSF)$total_features)), - xlim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - ylab=NULL,xlab="Frequency",yaxt="n") -for(x in seq_along(levels(qClust))) { - lines(x=temp_density[[x]]$y,y=temp_density[[x]]$x, - lwd=2,col=alpha(clustCols,0.7)[x]) -} - -## SizeFactor~LibSize by QuickCluster -qClustF <- qClust[colnames(ebN)] -par(mar=c(3,3,0,0),mgp=2:0) -plot(x=log10(colData(ebN)$total_counts)[temp_randcells], - y=log10(sizeFactors(ebN))[temp_randcells], - pch=21,col=alpha(clustCols,0.4)[qClustF][temp_randcells], - bg=alpha(clustCols,0.2)[qClustF][temp_randcells], - xlab=expression(log[10]~"Library Size"),ylab=expression(log[10]~"Size Factor")) -mtext("Clusters for normalization",side=3,line=-1.5,font=2) -legend("bottomright",bty="n",ncol=2,inset=c(0,.03),lwd=2,pch=21, - col=alpha(clustCols[seq_along(levels(qClustF))],0.3), - pt.bg=alpha(clustCols[seq_along(levels(qClustF))],0.1), - legend=paste0(levels(qClustF),": ",table(qClustF))) -mtext(paste(ncol(ebRawSF) - ncol(ebN),"cells were not normalized"), - side=1,line=-1.1,adj=.99,cex=.8) - -par(mar=c(0,3,.1,0)) -temp_density <- tapply(log10(colData(ebN)$total_counts),qClustF, - function(X) density(X)) -plot(x=NULL,y=NULL,xlim=range(log10(colData(ebN)$total_counts)), - ylim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - xlab=NULL,ylab="Frequency",xaxt="n") -for (x in seq_along(levels(qClustF))) { - lines(temp_density[[x]],lwd=2,col=alpha(clustCols,0.7)[x]) -} - -par(mar=c(3,0,0,.1)) -temp_density <- tapply(log10(sizeFactors(ebN)),qClustF, - function(X) density(X)) -plot(x=NULL,y=NULL,ylim=range(log10(sizeFactors(ebN))), - xlim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - ylab=NULL,xlab="Frequency",yaxt="n") -for(x in seq_along(levels(qClustF))) { - lines(x=temp_density[[x]]$y,y=temp_density[[x]]$x, - lwd=2,col=alpha(clustCols,0.7)[x]) -} - -``` - -Cells that fail to normalize are generally due to poor information content (small library size, weak gene expression relative to other cells). - -```{r cleanup3, echo=T} -rm(list=ls()[!ls() %in% c("ebN","dataPath")]) -gc() -``` - -## Highly variable genes -Identification of highly variable genes is done by assuming most endogenous genes are not variably expressed, and fitting a curve to these genes when comparing variance to mean. This curve is presumed to represent technical variation, and thus highly variable genes are those with variance significantly greater than this curve. The method used here from *scran* isn’t that different in logic to the *Seurat* method, but fitting a spline is just a little more refined way of going about it than Seurat’s binning method. - -```{r HVG2, echo=T,fig.height=6.3,fig.width=8.4} -var.fit <- trendVar(exprs(ebN),method="loess",parametric=T) -var.out <- decomposeVar(exprs(ebN),var.fit) -bioCut <- 0 -bioCutFDR <- 1e-2 -hvg <- var.out[which(var.out$FDR <= bioCutFDR & var.out$bio >= bioCut),] -hvg <- hvg[order(hvg$bio,decreasing=T),] - -par(mar=c(3,3,3,1),mgp=2:0) -plot(total~mean, data=var.out[!rownames(var.out) %in% rownames(hvg),], - ylim=range(var.out$total),xlim=range(var.out$mean), - pch=21,col=alpha("black",0.3),bg=alpha("black",0.1), - xlab="Mean log-expression",ylab="Variance of log-expression") -points(total~mean, data=var.out[rownames(var.out) %in% rownames(hvg),], - pch=21,col=alpha("red",0.3),bg=alpha("red",0.1)) -lines(var.out$mean[order(var.out$mean)],var.out$tech[order(var.out$mean)], - col=alpha("red",0.5),lwd=2) -text(total~mean,data=var.out[rownames(hvg[1:10,]),], - labels=rownames(hvg[1:10,]),pos=4,col=alpha("red",0.5)) -legend("top",bty="n",inset=c(0,-.12),ncol=2,xpd=NA,lwd=c(2,NA,NA),pch=c(NA,21,NA), - col=alpha(c("red","red",NA),0.5),pt.bg=alpha(c(NA,"red",NA),0.3), - legend=c("Predicted technical variance", - paste("Biological variance > 0 at FDR <=",bioCutFDR), - paste(nrow(hvg),"/",nrow(ebN),"highly variable genes"))) -``` - -```{r output_for_clustering, echo=T} -ebNorm <- exprs(ebN) -pDat <- cbind( - colData(ebN)[,c("total_features","total_counts")], - colData(ebN)[,which(colnames(colData(ebN)) == "mitoPct"):ncol(colData(ebN))] -) -save(ebNorm,pDat,hvg,file=paste0(dataPath,"clustInputs.RData")) - -## Writing the normalized matrix as a .csv in case you want to use it for other things -## (ie. loading in Python) -write.csv(as.matrix(exprs(ebN)),file=paste0(dataPath,"ebNorm.csv"),quote=F) -``` diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN.md b/ToBeConvertedToPkg/pipeline/pipeline_QCN.md deleted file mode 100644 index d7fc539..0000000 --- a/ToBeConvertedToPkg/pipeline/pipeline_QCN.md +++ /dev/null @@ -1,905 +0,0 @@ -### [Home](/scClustViz) -- [Quality control and - normalization](#quality-control-and-normalization) -- [Cell filtering](#cell-filtering) - - [Mitochondrial gene content](#mitochondrial-gene-content) - - [Outlier filtering](#outlier-filtering) - - [Outlier inspection](#outlier-inspection) - - [Cell cycle prediction using - *Cyclone*](#cell-cycle-prediction-using-cyclone) - - [Filter out low abundance - genes](#filter-out-low-abundance-genes) -- [Normalization](#normalization) -- [Highly variable genes](#highly-variable-genes) - -Quality control and normalization -================================= - - input_dataPath <- "../scClustViz_files/neurons_900_filtered_gene_bc_matrices/filtered_gene_bc_matrices/mm10/" - ## Path to 10X output directory (containing .mtx file) - ## i.e. http://cf.10xgenomics.com/samples/cell-exp/2.1.0/neurons_900/neurons_900_filtered_gene_bc_matrices.tar.gz - dataPath <- "../scClustViz_files/demo_10Xneurons900/" - ## Path to analysis output directory - ## Will be created if it doesn't already exist - dataSpecies <- "mouse" - ## Set species ("mouse"/"human" or add your own - see below) - dataName <- "10Xneurons" - ## Name your analysis results - - - library(Matrix) - library(scales) - library(viridis) - - ## Loading required package: viridisLite - - ## - ## Attaching package: 'viridis' - - ## The following object is masked from 'package:scales': - ## - ## viridis_pal - - library(RColorBrewer) - library(biomaRt) # from Bioconductor - library(scran) # from Bioconductor - - ## Loading required package: BiocParallel - - ## Loading required package: SingleCellExperiment - - ## Loading required package: SummarizedExperiment - - ## Loading required package: GenomicRanges - - ## Loading required package: stats4 - - ## Loading required package: BiocGenerics - - ## Loading required package: parallel - - ## - ## Attaching package: 'BiocGenerics' - - ## The following objects are masked from 'package:parallel': - ## - ## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, - ## clusterExport, clusterMap, parApply, parCapply, parLapply, - ## parLapplyLB, parRapply, parSapply, parSapplyLB - - ## The following objects are masked from 'package:Matrix': - ## - ## colMeans, colSums, rowMeans, rowSums, which - - ## The following objects are masked from 'package:stats': - ## - ## IQR, mad, sd, var, xtabs - - ## The following objects are masked from 'package:base': - ## - ## anyDuplicated, append, as.data.frame, basename, cbind, - ## colMeans, colnames, colSums, dirname, do.call, duplicated, - ## eval, evalq, Filter, Find, get, grep, grepl, intersect, - ## is.unsorted, lapply, lengths, Map, mapply, match, mget, order, - ## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, - ## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, - ## table, tapply, union, unique, unsplit, which, which.max, - ## which.min - - ## Loading required package: S4Vectors - - ## - ## Attaching package: 'S4Vectors' - - ## The following object is masked from 'package:Matrix': - ## - ## expand - - ## The following object is masked from 'package:base': - ## - ## expand.grid - - ## Loading required package: IRanges - - ## - ## Attaching package: 'IRanges' - - ## The following object is masked from 'package:grDevices': - ## - ## windows - - ## Loading required package: GenomeInfoDb - - ## Loading required package: Biobase - - ## Welcome to Bioconductor - ## - ## Vignettes contain introductory material; view with - ## 'browseVignettes()'. To cite Bioconductor, see - ## 'citation("Biobase")', and for packages 'citation("pkgname")'. - - ## Loading required package: DelayedArray - - ## Loading required package: matrixStats - - ## - ## Attaching package: 'matrixStats' - - ## The following objects are masked from 'package:Biobase': - ## - ## anyMissing, rowMedians - - ## - ## Attaching package: 'DelayedArray' - - ## The following objects are masked from 'package:matrixStats': - ## - ## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges - - ## The following objects are masked from 'package:base': - ## - ## aperm, apply - - library(DropletUtils) # from Bioconductor - - - if (Sys.info()["sysname"] == "Windows") { sys <- "D:/" } else { sys <- "~/" } - if (dataSpecies == "mouse") { - mart <- useMart("ensembl","mmusculus_gene_ensembl") - speciesSymbol <- "mgi_symbol" - speciesMito <- "^mt-" - cycloneSpeciesMarkers <- "mouse_cycle_markers.rds" - } else if (dataSpecies == "human") { - mart <- useMart("ensembl","hsapiens_gene_ensembl") - speciesSymbol <- "hgnc_symbol" - speciesMito <- "^MT-" - cycloneSpeciesMarkers <- "human_cycle_markers.rds" - } else { print("Set species please!") } - dir.create(dataPath,recursive=T,showWarnings=F) - - plotHistHoriz <- function(input,col="grey80",add=F) { - tempH <- hist(input,breaks=50,plot=F) - if (!add) { - plot(x=NULL,y=NULL,xlim=range(tempH$counts),ylim=range(input), - bty="n",ylab=NA,yaxt="n",xaxt="n",xlab=NA) - } - rect(xleft=rep(0,length(tempH$counts)),ybottom=tempH$breaks[-length(tempH$breaks)], - xright=tempH$counts,ytop=tempH$breaks[-1],col=col) - } - - rainbow2 <- function(n,a=1) { - require(scales) - hues = seq(15, 375, length = n + 1) - alpha(hcl(h = hues, l = 60, c = 100)[1:n],a) - } - -Cell filtering -============== - - ebRaw <- counts(read10xCounts(input_dataPath,col.names=T)) - ebRaw <- ebRaw[,Matrix::colSums(ebRaw) > 0] - ebRaw <- ebRaw[Matrix::rowSums(ebRaw) > 0,] - - ## Check rownames at this point. This workflow expects MGI or HGNC gene symbols as the - ## rownames. If the data was aligned to a reference genome that used ensembl gene IDs or - ## another annotation you will have to either convert them to gene symbols, or identify - ## which ensembl IDs correspond to the mitochondrial genome for the mitoFilt step. - - geneRowNames <- "ensembl_gene_id" # Set if rownames are ensemble gene IDs - #geneRowNames <- speciesSymbol # Set if rownames are gene symbols (no conversion). - - #### - - if (geneRowNames != speciesSymbol) { - e2g <- getBM(attributes=c(geneRowNames,speciesSymbol), - mart=mart,filters=geneRowNames, - values=rownames(ebRaw)) - if (nrow(e2g) < 10) { - stop("Check row names and select appropriate rowname identifier!") - } - ## Removing unmapped gene symbols from conversion table - e2g <- e2g[e2g[,speciesSymbol] != "",] - print(paste(sum(duplicated(e2g[,geneRowNames])), - geneRowNames,"mapped to multiple",speciesSymbol)) - ## Arbitrarily picking one mapping for the above duplicates, - ## since these generally map to predicted genes anyway. - e2g <- e2g[!duplicated(e2g[,geneRowNames]),] - rownames(e2g) <- e2g[,geneRowNames] - ebRaw <- ebRaw[e2g[,geneRowNames],] # removing unmapped genes from data - temp_repName <- unique(e2g[,speciesSymbol][duplicated(e2g[,speciesSymbol])]) - print(paste(length(temp_repName),speciesSymbol,"mapped to multiple",geneRowNames)) - ## Going to collapse these by summing UMI counts between duplicated rows. - temp_repRow <- ebRaw[e2g[,speciesSymbol] %in% temp_repName,] - ebRaw <- ebRaw[!e2g[,speciesSymbol] %in% temp_repName,] - ## Removed duplicated rows from data, saved as separate object - rownames(ebRaw) <- e2g[rownames(ebRaw),speciesSymbol] # renamed rows in data as symbols - temp_repRow <- Matrix(t(sapply(temp_repName,function(X) - Matrix::colSums(temp_repRow[e2g[,geneRowNames][e2g[,speciesSymbol] == X],]))),sparse=T) - ## Collapsed by summing each duplicated gene symbol's row - ebRaw <- rbind(ebRaw,temp_repRow) # added those data back to matrix - } - - ## [1] "0 ensembl_gene_id mapped to multiple mgi_symbol" - ## [1] "12 mgi_symbol mapped to multiple ensembl_gene_id" - - ## Consolidating duplicated gene names - ## (if genes are in under their MGI and HGNC symbols, or some other annotation mixup) - if (any(duplicated(toupper(rownames(ebRaw))))) { - temp_repName <- unique(toupper(rownames(ebRaw)[duplicated(toupper(rownames(ebRaw)))])) - print(paste(dataName,"-",length(temp_repName),"duplicated gene names.")) - ## Going to collapse these by summing UMI counts between duplicated rows. - temp_repRow <- ebRaw[toupper(rownames(ebRaw)) %in% temp_repName,] - ebRaw <- ebRaw[!toupper(rownames(ebRaw)) %in% temp_repName,] - if (!exists("e2g")) { - e2g <- getBM(attributes=speciesSymbol, - mart=mart,filters=speciesSymbol, - values=rownames(temp_repRow)) - } - rownames(e2g) <- toupper(e2g$mgi_symbol) - if (any(!temp_repName %in% rownames(e2g))) { - warning(paste("Some of your duplicated rownames aren't", - speciesSymbol,"and are being removed.")) - temp_repName <- temp_repName[temp_repName %in% rownames(e2g)] - } - temp_repRow <- Matrix(t(sapply(temp_repName,function(X) - Matrix::colSums(temp_repRow[toupper(rownames(temp_repRow)) == X,]))),sparse=T) - rownames(temp_repRow) <- e2g[rownames(temp_repRow),"mgi_symbol"] - ebRaw <- rbind(ebRaw,temp_repRow) # added those data back to matrix - rm(temp_repName,temp_repRow) - } - rm(list=ls()[grepl("temp",ls())]) - - libSize <- Matrix::colSums(ebRaw) - cumCounts <- cumsum(libSize[order(libSize,decreasing=T)]) - maxCount <- 10^ceiling(log10(max(libSize))) - countCols <- cut(log10(libSize[order(libSize,decreasing=T)]), - breaks=floor(log10(min(libSize))):ceiling(log10(max(libSize))), - labels=F,include.lowest=T) - layout(matrix(c(3,3,1:2),nrow=2,byrow=T),heights=c(1,3.2)) - par(mar=c(3,3,1,1),mgp=2:0) - plot(seq_along(cumCounts),cumCounts/max(cumCounts),pch=19,ylim=c(0,1), - col=viridis(max(countCols),d=-1)[countCols], - xlab="Cell libraries (largest to smallest)",ylab="Cumulative fraction of UMIs") - plot(seq_along(cumCounts),cumCounts/max(cumCounts),pch=19,log="x",ylim=c(0,1), - col=viridis(max(countCols),d=-1)[countCols], - xlab="Cell libraries (largest to smallest)",ylab="Cumulative fraction of UMIs") - par(mar=c(3,1,2,1)) - barplot(rep(1,max(countCols)),col=viridis(max(countCols),d=-1), - space=0,border=NA,axes=F,xlab="UMIs per cell") - axis(side=1,at=0:max(countCols), - labels=sapply(floor(log10(min(libSize))):ceiling(log10(max(libSize))), - function(X) 10^X)) - title(main="STAMPs from cells by UMIs per STAMP") - -![](pipeline_QCN_files/figure-markdown_strict/plot_knee2-1.png) - -This is to check the cell filtering done in CellRanger. If you have -cells with small library sizes, or a fairly horizontal line in the knee -plot, you might consider revisiting the filtering for "cells" vs empty -droplets. - -Mitochondrial gene content --------------------------- - -Filtering cells based on the proportion of mitochondrial gene -transcripts per cell. A high proportion of mitochondrial gene -transcripts are indicative of poor quality cells, probably due to -compromised cell membranes. Removal of these cells should not decrease -the complexity of the overall dataName (measured by number of genes -detected), while removing a source of noise. - - ### Parameters you could edit ### - drop_mitoMads <- 4 - ## ^ Median absolute deviations from the median to use as - ## threshold for mitochondrial transcript proprotion. - hard_mitoCut <- 0.4 - # ^ Hard threshold for mitochondrial transcript proportion - - ### Calculations and filtering ### - temp_geneDetectFx <- function(ebRaw) { - if (is.null(slotNames(ebRaw))) { - apply(ebRaw,2,function(X) sum(X>0)) - } else if ("j" %in% slotNames(ebRaw)) { - as.vector(table(ebRaw@j)) - } else { - as.vector(table(rep(seq_along(diff(ebRaw@p)),diff(ebRaw@p)))) - } - } - cellStats <- data.frame( - libSize=Matrix::colSums(ebRaw), - geneDetect=temp_geneDetectFx(ebRaw), - mitoPct=Matrix::colSums(ebRaw[grepl(speciesMito,rownames(ebRaw)),]) / - Matrix::colSums(ebRaw) - ) - - drop_mitoCut <- median(cellStats$mitoPct) + mad(cellStats$mitoPct) * drop_mitoMads - if (drop_mitoCut > hard_mitoCut) { drop_mitoCut <- hard_mitoCut } - drop_mito <- cellStats$mitoPct > drop_mitoCut - - ebRawF <- ebRaw[,!drop_mito] - ebRawF <- ebRawF[Matrix::rowSums(ebRawF) > 0,] - cellStatsF <- cellStats[!drop_mito,] - - ### Plotting ### - layout(matrix(c(2,1,0,3,5,4,0,6),2),c(3.7,.5,3.7,.5),c(0.5,3.7)) - par(mar=c(3,3,0,0),mgp=2:0) - plot(mitoPct~libSize,data=cellStats,log="x", - pch=21,cex=1.2,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab="Transcripts detected per cell (log scale)", - ylab="Transcript proportion from mitochondrial genome") - abline(h=drop_mitoCut,lwd=2,lty=2,col=alpha("red",0.5)) - mtext(paste(dataName,"damaged cells"),side=3,adj=0.98,line=-1.5,font=2,cex=1) - legend("topright",bty="n",inset=c(0,.05), - lty=c(2,NA,NA,NA,NA),lwd=c(2,NA,NA,NA,NA),col=alpha(c("red",NA,NA,NA,NA),0.5), - legend=c(paste(drop_mitoMads,"MADs above median"), - paste(sum(drop_mito),"cells removed"), - paste(ncol(ebRawF),"cells remain"), - paste(nrow(ebRaw)-nrow(ebRawF),"genes removed"), - paste(nrow(ebRawF),"genes remain"))) - par(mar=c(0,3,0,0)) - hist(log10(cellStats$libSize),freq=F,breaks=50,col="grey80", - main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - par(mar=c(3,0,0,0)) - plotHistHoriz(cellStats$mitoPct) - - par(mar=c(3,3,0,0)) - plot(mitoPct~geneDetect,data=cellStats,log="x", - pch=21,cex=1.2,col=alpha("black",0.2),bg=alpha("black",0.1), - xlab="Genes detected per cell (log scale)", - ylab="Transcript proportion from mitochondrial genome") - abline(h=drop_mitoCut,lwd=2,lty=2,col=alpha("red",0.5)) - mtext(paste(dataName,"damaged cells"),side=3,adj=0.98,line=-1.5,font=2,cex=1) - legend("topright",bty="n",inset=c(0,.05), - lty=c(2,NA,NA,NA,NA),lwd=c(2,NA,NA,NA,NA),col=alpha(c("red",NA,NA,NA,NA),0.5), - legend=c(paste(drop_mitoMads,"MADs above median"), - paste(sum(drop_mito),"cells removed"), - paste(ncol(ebRawF),"cells remain"), - paste(nrow(ebRaw)-nrow(ebRawF),"genes removed"), - paste(nrow(ebRawF),"genes remain"))) - par(mar=c(0,3,0,0)) - hist(log10(cellStats$geneDetect),freq=F,breaks=50,col="grey80", - main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - par(mar=c(3,0,0,0)) - plotHistHoriz(cellStats$mitoPct) - -![](pipeline_QCN_files/figure-markdown_strict/mitoFilt-1.png) - -Outlier filtering ------------------ - -Doublet rate is best controlled experimentally, by reducing the -concentration of the cells in the input suspension. Filtering for -doublets by library size doesn't really work, since the dispersion of -library sizes means that doublets containing two "small" cells will -still have a smaller library size than one "large" cell. However, it is -important to manually inspect the relationship between library size and -gene detection rates per cell to identify obvious outliers. Outliers can -be identified systematically using a sufficiently extreme number of -median absolute deviations from the median, assuming a moderately normal -distribution. Since library sizes tend to be log-normal, we use -log-transformed library size to identify outliers. - - ### Parameters you could edit ### - #outToInspect <- rep(F,nrow(cellStatsF)) # None - outToInspect <- with(cellStatsF,geneDetect < (libSize * 0.08 + 50)) - ## Defining a line below which the curious cells lie - - # outToRemove <- rep(F,nrow(cellStatsF)) # None - numMADs <- 4 # Median absolute deviations from the median to consider as an outlier. - outToRemoveHi <- log10(cellStatsF$libSize) > median(log10(cellStatsF$libSize)) + - mad(log10(cellStats$libSize)) * numMADs - outToRemoveLo <- log10(cellStatsF$libSize) < median(log10(cellStatsF$libSize)) - - mad(log10(cellStats$libSize)) * numMADs - outToRemove <- outToRemoveHi | outToRemoveLo - ## Assuming an approximately log-normal distribution of library sizes, this removes - ## obvious outliers. (4 MADs from the median of the log-transformed library size) - - - ### Calculations and filtering ### - ebRawF2 <- ebRawF[,!outToRemove] - ebRawF2 <- ebRawF2[Matrix::rowSums(ebRawF2) > 0,] - - ### Plotting ### - layout(cbind(matrix(c(2,1,0,3),2),matrix(c(5,4,0,6),2)), - widths=c(3.5,.7,3.5,.7),heights=c(.7,3.5)) - par(mar=c(3,3,0,0),mgp=2:0) - plot(geneDetect~libSize,data=cellStatsF[!outToInspect,], - xlim=range(cellStatsF$libSize),ylim=range(cellStatsF$geneDetect), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1),cex=1.2, - xlab="Transcripts detected per cell",ylab="Genes detected per cell") - points(geneDetect~libSize,data=cellStatsF[outToInspect,], - pch=24,col=alpha("blue",0.2),bg=alpha("blue",0.1),cex=1.2) - points(geneDetect~libSize,data=cellStatsF[outToRemove,], - pch=4,cex=1.2,col="red") - mtext(paste(dataName,"cell stats"),side=3,adj=0.02,line=-1.5,font=2,cex=1) - legend("bottomright",bty="n",pch=c(24,4,NA,NA), - col=c("blue","red",NA,NA),pt.bg=alpha(c("blue",NA,NA,NA),0.3), - legend=c(paste("Outliers to inspect:",sum(outToInspect)), - paste("Outliers to remove:",sum(outToRemove)), - paste(nrow(ebRawF)-nrow(ebRawF2),"genes removed"), - paste(nrow(ebRawF2),"genes remain"))) - par(mar=c(0,3,1,0)) - hist(cellStatsF$libSize[!outToInspect | !outToRemove], - freq=T,breaks=50,col="grey80",main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - if (any(outToInspect)) { - hist(cellStatsF$libSize[outToInspect],add=T, - freq=T,breaks=50,col=alpha("blue",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - } - if (any(outToRemove)) { - hist(cellStatsF$libSize[outToRemove],add=T, - freq=T,breaks=50,col=alpha("red",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - } - par(mar=c(3,0,0,1)) - plotHistHoriz(cellStatsF$geneDetect[!outToInspect | !outToRemove]) - if (any(outToInspect)) { - plotHistHoriz(cellStatsF$geneDetect[outToInspect],col=alpha("blue",.5),add=T) - } - if (any(outToRemove)) { - plotHistHoriz(cellStatsF$geneDetect[outToRemove],col=alpha("red",.5),add=T) - } - - par(mar=c(3,3,0,0),mgp=2:0) - plot(geneDetect~libSize,data=cellStatsF[!outToInspect,],log="xy", - xlim=range(cellStatsF$libSize),ylim=range(cellStatsF$geneDetect), - pch=21,col=alpha("black",0.2),bg=alpha("black",0.1),cex=1.2, - xlab="Transcripts detected per cell (log scale)", - ylab="Genes detected per cell (log scale)") - points(geneDetect~libSize,data=cellStatsF[outToInspect,], - pch=24,col=alpha("blue",0.2),bg=alpha("blue",0.1),cex=1.2) - points(geneDetect~libSize,data=cellStatsF[outToRemove,], - pch=4,cex=1.2,col="red") - mtext(paste(dataName,"cell stats"),side=3,adj=0.02,line=-1.5,font=2,cex=1) - legend("topleft",bty="n",inset=c(0,.05),pch=c(24,4,NA,NA), - col=c("blue","red",NA,NA),pt.bg=alpha(c("blue",NA,NA,NA),0.3), - legend=c(paste("Outliers to inspect:",sum(outToInspect)), - paste("Outliers to remove:",sum(outToRemove)), - paste(nrow(ebRawF)-nrow(ebRawF2),"genes removed"), - paste(nrow(ebRawF2),"genes remain"))) - par(mar=c(0,3,1,0)) - hist(log10(cellStatsF$libSize[!outToInspect | !outToRemove]), - freq=T,breaks=50,col="grey80",main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - if (any(outToInspect)) { - hist(log10(cellStatsF$libSize[outToInspect]),add=T, - freq=T,breaks=50,col=alpha("blue",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - } - if (any(outToRemove)) { - hist(log10(cellStatsF$libSize[outToRemove]),add=T, - freq=T,breaks=50,col=alpha("red",.5),main=NULL,xlab=NA,ylab=NA,xaxt="n",yaxt="n") - } - par(mar=c(3,0,0,1)) - plotHistHoriz(log10(cellStatsF$geneDetect[!outToInspect | !outToRemove])) - if (any(outToInspect)) { - plotHistHoriz(log10(cellStatsF$geneDetect[outToInspect]),col=alpha("blue",.5),add=T) - } - if (any(outToRemove)) { - plotHistHoriz(log10(cellStatsF$geneDetect[outToRemove]),col=alpha("red",.5),add=T) - } - -![](pipeline_QCN_files/figure-markdown_strict/doubFilt-1.png) - -It's never a bad idea to inspect libraries before removing them, or at -least ensuring that by removing them you are not losing genes from the -analysis (which might imply that those cells were a unique -population/cell-type). - -Outlier inspection ------------------- - - if (any(outToInspect)) { - ### Parameters you could edit ### - topNum <- 4 - ## Number of highly expressed genes in outlier population to highlight - removeOutlierPopulation <- T - ## Do you want to remove this outlier population from the analysis? - - ### Calculations ### - gsOut <- data.frame( - DR=apply(ebRawF2[,outToInspect[!outToRemove]],1,function(X) sum(X > 0)/length(X)), - MDTC=apply(ebRawF2[,outToInspect[!outToRemove]],1,function(X) mean(X[X>0])), - MTC=Matrix::rowMeans(ebRawF2[,outToInspect[!outToRemove]]) - ) - rownames(gsOut) <- rownames(ebRawF2) - gsIn <- data.frame( - DR=apply(ebRawF2[,!outToInspect[!outToRemove]],1,function(X) sum(X > 0)/length(X)), - MDTC=apply(ebRawF2[,!outToInspect[!outToRemove]],1,function(X) mean(X[X>0])), - MTC=Matrix::rowMeans(ebRawF2[,!outToInspect[!outToRemove]]) - ) - rownames(gsIn) <- rownames(ebRawF2) - if (removeOutlierPopulation) { - ebRawF3 <- ebRawF2[,!outToInspect[!outToRemove]] - ebRawF3 <- ebRawF3[Matrix::rowSums(ebRawF3) > 0,] - } else { ebRawF3 <- ebRawF2 } - topHits <- 1:nrow(gsOut) %in% head(order(gsOut$MTC,decreasing=T),topNum) - - ### Plotting ### - par(mfrow=c(1,2),mar=c(3,3,2,1),mgp=2:0) - plot(log10(MDTC)~DR,data=gsOut, - pch=21,cex=1.2,xlab="Proportion of cells detecting gene", - ylab=expression(Log[10]~"Mean non-zero gene count"), - col=alpha(c("black","red"),0.3)[topHits+1], - bg=alpha(c("black","red"),0.1)[topHits+1], - main=paste(dataName,"outlier cells")) - text(log10(MDTC)~DR,data=gsOut[topHits,], - labels=rownames(gsOut)[topHits],pos=2,col="red",cex=1.2) - if (removeOutlierPopulation) { - legend("topleft",inset=c(-.06,-.02),bty="n",cex=0.9, - legend=c(paste(ncol(ebRawF2)-ncol(ebRawF3),"cells removed"), - paste(ncol(ebRawF3),"cells remain"), - paste(nrow(ebRawF2)-nrow(ebRawF3),"genes removed"), - paste(nrow(ebRawF3),"genes remain"))) - } else { - legend("topleft",inset=c(-.06,-.02),bty="n",cex=0.9, - legend=c(paste(sum(outToInspect[!outToRemove]),"outlier cells"), - paste(sum(!outToInspect[!outToRemove]),"cells in main population"), - paste(nrow(ebRawF3) - - sum(Matrix::rowSums(ebRawF3[,!outToInspect[!outToRemove]]) > 0), - "genes unique to outliers"), - paste(sum(Matrix::rowSums(ebRawF2[,!outToInspect[!outToRemove]]) > 0), - "genes in main population"))) - } - - plot(log10(MDTC)~DR,data=gsIn, - pch=21,cex=1.2,xlab="Proportion of cells detecting gene", - ylab=expression(Log[10]~"Mean non-zero gene count"), - col=alpha(c("black","red"),0.3)[topHits+1], - bg=alpha(c("black","red"),0.1)[topHits+1], - main=paste(dataName,"main cell population")) - text(log10(MDTC)~DR,data=gsIn[topHits,], - labels=rownames(gsIn)[topHits],pos=2,col="red",cex=1.2) - } else { ebRawF3 <- ebRawF2 } - -![](pipeline_QCN_files/figure-markdown_strict/what_are_those_cells-1.png) - -This outlier population looks to be a red blood cell or progenitor -(based on the very high expression of haemoglobin, and low genetic -complexity), which seems to be a common contaminant in brain single-cell -data. We removed this population from downstream analysis. - - cellStatsF3 <- cellStats[colnames(ebRawF3),] - rm(list=ls()[!ls() %in% c("ebRawF3","cellStatsF3","dataName","dataPath","plotHistHoriz", - "rainbow2","mart","speciesSymbol","cycloneSpeciesMarkers", - "geneLengthPath")]) - gc() - - ## used (Mb) gc trigger (Mb) max used (Mb) - ## Ncells 5491585 293.3 8442490 450.9 8442490 450.9 - ## Vcells 13870612 105.9 65656106 501.0 82069840 626.2 - -Data processing will now be performed based on a workflow published by -the Marioni group (Lun et al., F1000Research 2016; -). - - ebRawS <- SingleCellExperiment(list(counts=ebRawF3)) - ebRawS <- scater::calculateQCMetrics(ebRawS) - - ## Note that the names of some metrics have changed, see 'Renamed metrics' in ?calculateQCMetrics. - ## Old names are currently maintained for back-compatibility, but may be removed in future releases. - - colData(ebRawS)$mitoPct <- cellStatsF3$mitoPct - rm(ebRawF3,cellStatsF3) - gc() - - ## used (Mb) gc trigger (Mb) max used (Mb) - ## Ncells 5497910 293.7 8442490 450.9 8442490 450.9 - ## Vcells 13994074 106.8 52524884 400.8 82069840 626.2 - -Cell cycle prediction using *Cyclone* -------------------------------------- - - if (!file.exists(paste0(dataPath,"ebRawS.RData"))) { - g2e <- getBM(attributes=c(speciesSymbol,"ensembl_gene_id"), - mart=mart,filters=speciesSymbol, - values=rownames(ebRawS)) - cycPairs <- readRDS(system.file("exdata",cycloneSpeciesMarkers,package="scran")) - g2e <- g2e[g2e$ensembl_gene_id %in% unique((unlist(cycPairs))),] - tempCyc <- cyclone(ebRawS[g2e$mgi_symbol,], - gene.names=g2e$ensembl_gene_id,pairs=cycPairs) - tempCyc$confidence <- sapply(seq_along(tempCyc$phases),function(i) - tempCyc$normalized.scores[i,tempCyc$phases[i]]) - colnames(tempCyc$scores) <- paste0("cycScore.",colnames(tempCyc$scores)) - colnames(tempCyc$normalized.scores) <- paste0("cycScoreNorm.", - colnames(tempCyc$normalized.scores)) - colData(ebRawS) <- cbind(colData(ebRawS),tempCyc$scores,tempCyc$normalized.scores) - colData(ebRawS)$cycPhase <- factor(tempCyc$phases,levels=c("G1","S","G2M")) - colData(ebRawS)$cycConfidence <- tempCyc$confidence - - save(ebRawS,file=paste0(dataPath,"ebRawS.RData")) - } else { - load(paste0(dataPath,"ebRawS.RData")) - print(paste("Data loaded from",paste0(dataPath,"ebRawS.RData"))) - } - - ## [1] "Data loaded from ../scClustViz_files/demo_10Xneurons900/ebRawS.RData" - - layout(matrix(c(1,2,1,3,1,4),2),widths=c(2,5,1),heights=c(1,9)) - par(mar=rep(0,4),mgp=2:0) - plot.new() - title("Cell cycle phase assignment confidence, library sizes, and distribution per sample", - line=-2,cex.main=1.5) - - par(mar=c(3,3,1,1),bty="n") - boxplot(tapply(colData(ebRawS)$cycConfidence,colData(ebRawS)$cycPhase,c), - col=alpha(brewer.pal(3,"Dark2"),0.7), - ylab="Normalized score of assigned cell cycle phase") - - par(mar=c(3,3,1,1)) - cycDlibSize <- tapply(log10(colData(ebRawS)$total_counts),colData(ebRawS)$cycPhase, - function(X) density(X)) - plot(x=NULL,y=NULL,ylab="Density",xlab=expression(Log[10]~"Library Size"), - xlim=range(log10(colData(ebRawS)$total_counts)), - ylim=c(min(sapply(cycDlibSize,function(X) min(X$y))), - max(sapply(cycDlibSize,function(X) max(X$y))))) - for (x in 1:length(cycDlibSize)) { - lines(cycDlibSize[[x]],col=alpha(brewer.pal(3,"Dark2"),0.7)[x],lwd=3) - } - legend("topright",bty="n",horiz=T,lwd=rep(3,3), - col=alpha(brewer.pal(3,"Dark2"),0.7),legend=levels(colData(ebRawS)$cycPhase)) - - par(mar=c(5,3,1,1)) - barplot(cbind(table(colData(ebRawS)$cycPhase)), - col=alpha(brewer.pal(3,"Dark2"),0.7), - ylab="Proportion of cells",las=2) - -![](pipeline_QCN_files/figure-markdown_strict/cell_cycle_annotation-1.png) - -Cyclone generates individual scores for each cell cycle phase. G1 and -G2/M are assigned based on these scores, and any cells not strongly -scoring for either phase are assigned to S phase. -Later in the pipeline we will also attempt to predict cell cycle using -the method from *Seurat*. - -Filter out low abundance genes ------------------------------- - -Noisy genes must be removed to prevent them from skewing normalization. -The filtering method in *Seurat* removes only genes detected in very few -cells, which is sufficient for normalization while removing as few genes -as possible. - - geneStats <- data.frame(DR=apply(counts(ebRawS),1,function(X) sum(X > 0)/length(X)), - MDTC=apply(counts(ebRawS),1,function(X) mean(X[X > 0])), - cellMax=apply(counts(ebRawS),1,max)) - drop_g <- geneStats$DR < 3/ncol(ebRawS) - ebRawSF <- ebRawS[!drop_g,] - - layout(matrix(c(2,1,0,3),2),widths=c(6,1),heights=c(1,6)) - par(mar=c(3,3,0,0),mgp=2:0) - temp_H <- cut(log10(geneStats[order(geneStats$cellMax,decreasing=F),"cellMax"]), - breaks=100,labels=F) - plot(log10(MDTC)~log10(DR),data=geneStats[order(geneStats$cellMax,decreasing=F),], - pch=21,col=viridis(100,0.5,1,0)[temp_H],bg=viridis(100,0.3,1,0)[temp_H], - xlab=expression(Log[10]~"Proportion of cells detecting gene"), - ylab=expression(Log[10]~"Mean transcript count of detected genes (MDTC)")) - points(log10(MDTC)~log10(DR),data=geneStats[drop_g,], - pch=4,col=alpha("red",0.5),cex=1.2) - legend("top",bty="n",pch=c(4,NA,NA),col=c("red",NA,NA),cex=1.1,inset=c(0,.06), - legend=c("Gene in < 3 cells", - paste("Genes removed:",sum(drop_g)), - paste("Genes remaining:",nrow(ebRawSF)))) - segments(x0=seq(quantile(range(log10(geneStats$DR)),.2), - quantile(range(log10(geneStats$DR)),.8),length.out=1000), - y0=rep(max(log10(geneStats$MDTC)) * 1.02), - y1=rep(max(log10(geneStats$MDTC))),col=viridis(1000)) - text(x=c(quantile(range(log10(geneStats$DR)),.2), - median(range(log10(geneStats$DR))), - quantile(range(log10(geneStats$DR)),.8)), - y=rep(max(log10(geneStats$MDTC)) * .98,3), - labels=c(min(geneStats$cellMax), - expression(Log[10]~bold(max)~transcript~count), - max(geneStats$cellMax)),cex=1.1) - - par(mar=c(0,3,0,0)) - hist(log10(geneStats$DR),freq=T,breaks=100,col="grey80",main=NULL,xaxt="n") - title("Gene expression distribution",line=-2,cex.main=1.5) - par(mar=c(3,0,0,0)) - barplot(hist(log10(geneStats$MDTC),breaks=100,plot=F)$counts, - horiz=T,space=0,col="grey80",main=NULL,xlab="Frequency") - -![](pipeline_QCN_files/figure-markdown_strict/geneFilt-1.png) - - rm(list=ls()[!ls() %in% c("ebRawSF","dataName","dataPath","plotHistHoriz","rainbow2")]) - gc() - - ## used (Mb) gc trigger (Mb) max used (Mb) - ## Ncells 5524835 295.1 8442490 450.9 8442490 450.9 - ## Vcells 14015625 107.0 60649465 462.8 82069840 626.2 - -Normalization -============= - -Next step is normalization. Marioni proposed a normalization technique -that attempts to generate cell-specific size factors that are robust to -differential expression between genes in a heterogenous sample, unlike -simple library-size normalization -(). -This method correlates strongly with library size normalization for -homogenous samples, but solves a series of linear equations to -deconvolute cell-specific size factors for normalization. In order to -better handle heterogenous data, they suggest separating the data by -simple heirarchical clustering of a Spearman correlation-based distance -metric so that they can normalize the separate subpopulations separately -to prevent the suppression of true differential expression during -normalization. - -Normalization is carried out by assigning size factors per gene by the -pooling and deconvolution method, then taking the log-ratio between each -count and its size factor, and adding a pseudocount of one. -Log-transforming the data stabilizes variance by reducing the impact of -a few highly variable genes. - -Following this, it is suggested to investigate sources of technical -variance, but without spike-ins or any annotated possible sources of -variation, this step is impossible with this data. - - if (!file.exists(paste0(dataPath,"ebNorm.RData"))) { - qClust <- quickCluster(ebRawSF,min.size=200) - ## Clustering of heterogenous data is suggested prior to normalization, - ## but if you have few cells or homogenous data, it may not be necessary. - names(qClust) <- colnames(ebRawSF) - ebRawSF <- computeSumFactors(ebRawSF,clusters=qClust) - ebN <- ebRawSF[,!sizeFactors(ebRawSF) <= 0] - ebN <- normalize(ebN) - naCells <- apply(exprs(ebN),2,function(X) any(is.na(X))) - if (any(naCells)) { - exprs(ebN)[,naCells] <- min(apply(exprs(ebN),1,function(X) min(X,na.rm = T))) - } - save(qClust,ebN,file=paste0(dataPath,"ebNorm.RData")) - } else { - load(paste0(dataPath,"ebNorm.RData")) - print(paste("Data loaded from",paste0(dataPath,"ebNorm.RData"))) - } - - ## [1] "Data loaded from ../scClustViz_files/demo_10Xneurons900/ebNorm.RData" - - geneStatsN <- data.frame( - DR=apply(exprs(ebN),1,function(X) sum(X > 0))/ncol(ebN), - MDTC=apply(exprs(ebN),1,function(X) mean(X[X > 0])), - MTC=Matrix::rowMeans(exprs(ebN)), - sumTC=Matrix::rowSums(exprs(ebN)), - cellMax=apply(exprs(ebN),1,max) - ) - - clustCols <- rainbow2(length(levels(qClust))) - temp_randcells <- sample.int(ncol(ebN)) - - temp_times <- as.factor(sub("_.+$","",colnames(ebRawSF))) - layout(matrix(c(2,1,0,3,5,4,0,6),2), - widths=c(3.5,.7,3.5,.7),heights=c(.7,3.5)) - - ## GeneDetect~LibSize by QuickCluster - par(mar=c(3,3,0,0),mgp=2:0) - plot(x=log10(colData(ebRawSF)$total_counts)[temp_randcells], - y=log10(colData(ebRawSF)$total_features)[temp_randcells], - pch=21,col=alpha(clustCols,0.4)[qClust][temp_randcells], - bg=alpha(clustCols,0.2)[qClust][temp_randcells], - xlab=expression(log[10]~"Library Size"),ylab=expression(log[10]~"Genes Detected")) - points(log10(colData(ebRawSF)$total_counts)[!colnames(ebRawSF) %in% colnames(ebN)], - log10(colData(ebRawSF)$total_features)[!colnames(ebRawSF) %in% colnames(ebN)], - pch=4,col="red") - mtext("Clusters for normalization",side=3,line=-1.5,font=2) - legend("topleft",bty="n",ncol=2,inset=c(0,.05),lwd=2,pch=21, - col=alpha(clustCols[seq_along(levels(qClust))],0.3), - pt.bg=alpha(clustCols[seq_along(levels(qClust))],0.1), - legend=paste0(levels(qClust),": ",table(qClust))) - legend("bottomright",bty="n",pch=4,col="red", - legend=paste(ncol(ebRawSF) - ncol(ebN),"cells could not be normalized")) - - par(mar=c(0,3,.1,0)) - temp_density <- tapply(log10(colData(ebRawSF)$total_counts),qClust, - function(X) density(X)) - plot(x=NULL,y=NULL,xlim=range(log10(colData(ebRawSF)$total_counts)), - ylim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - xlab=NULL,ylab="Frequency",xaxt="n") - for (x in seq_along(levels(qClust))) { - lines(temp_density[[x]],lwd=2,col=alpha(clustCols,0.7)[x]) - } - - par(mar=c(3,0,0,.1)) - temp_density <- tapply(log10(colData(ebRawSF)$total_features),qClust, - function(X) density(X)) - plot(x=NULL,y=NULL,ylim=range(log10(colData(ebRawSF)$total_features)), - xlim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - ylab=NULL,xlab="Frequency",yaxt="n") - for(x in seq_along(levels(qClust))) { - lines(x=temp_density[[x]]$y,y=temp_density[[x]]$x, - lwd=2,col=alpha(clustCols,0.7)[x]) - } - - ## SizeFactor~LibSize by QuickCluster - qClustF <- qClust[colnames(ebN)] - par(mar=c(3,3,0,0),mgp=2:0) - plot(x=log10(colData(ebN)$total_counts)[temp_randcells], - y=log10(sizeFactors(ebN))[temp_randcells], - pch=21,col=alpha(clustCols,0.4)[qClustF][temp_randcells], - bg=alpha(clustCols,0.2)[qClustF][temp_randcells], - xlab=expression(log[10]~"Library Size"),ylab=expression(log[10]~"Size Factor")) - mtext("Clusters for normalization",side=3,line=-1.5,font=2) - legend("bottomright",bty="n",ncol=2,inset=c(0,.03),lwd=2,pch=21, - col=alpha(clustCols[seq_along(levels(qClustF))],0.3), - pt.bg=alpha(clustCols[seq_along(levels(qClustF))],0.1), - legend=paste0(levels(qClustF),": ",table(qClustF))) - mtext(paste(ncol(ebRawSF) - ncol(ebN),"cells were not normalized"), - side=1,line=-1.1,adj=.99,cex=.8) - - par(mar=c(0,3,.1,0)) - temp_density <- tapply(log10(colData(ebN)$total_counts),qClustF, - function(X) density(X)) - plot(x=NULL,y=NULL,xlim=range(log10(colData(ebN)$total_counts)), - ylim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - xlab=NULL,ylab="Frequency",xaxt="n") - for (x in seq_along(levels(qClustF))) { - lines(temp_density[[x]],lwd=2,col=alpha(clustCols,0.7)[x]) - } - - par(mar=c(3,0,0,.1)) - temp_density <- tapply(log10(sizeFactors(ebN)),qClustF, - function(X) density(X)) - plot(x=NULL,y=NULL,ylim=range(log10(sizeFactors(ebN))), - xlim=range(unlist(lapply(temp_density,function(X) range(X$y)))), - ylab=NULL,xlab="Frequency",yaxt="n") - for(x in seq_along(levels(qClustF))) { - lines(x=temp_density[[x]]$y,y=temp_density[[x]]$x, - lwd=2,col=alpha(clustCols,0.7)[x]) - } - -![](pipeline_QCN_files/figure-markdown_strict/normalize_fig1-1.png) - -Cells that fail to normalize are generally due to poor information -content (small library size, weak gene expression relative to other -cells). - - rm(list=ls()[!ls() %in% c("ebN","dataPath")]) - gc() - - ## used (Mb) gc trigger (Mb) max used (Mb) - ## Ncells 5534101 295.6 8442490 450.9 8442490 450.9 - ## Vcells 18254065 139.3 58287486 444.7 82069840 626.2 - -Highly variable genes -===================== - -Identification of highly variable genes is done by assuming most -endogenous genes are not variably expressed, and fitting a curve to -these genes when comparing variance to mean. This curve is presumed to -represent technical variation, and thus highly variable genes are those -with variance significantly greater than this curve. The method used -here from *scran* isn’t that different in logic to the *Seurat* method, -but fitting a spline is just a little more refined way of going about it -than Seurat’s binning method. - - var.fit <- trendVar(exprs(ebN),method="loess",parametric=T) - var.out <- decomposeVar(exprs(ebN),var.fit) - bioCut <- 0 - bioCutFDR <- 1e-2 - hvg <- var.out[which(var.out$FDR <= bioCutFDR & var.out$bio >= bioCut),] - hvg <- hvg[order(hvg$bio,decreasing=T),] - - par(mar=c(3,3,3,1),mgp=2:0) - plot(total~mean, data=var.out[!rownames(var.out) %in% rownames(hvg),], - ylim=range(var.out$total),xlim=range(var.out$mean), - pch=21,col=alpha("black",0.3),bg=alpha("black",0.1), - xlab="Mean log-expression",ylab="Variance of log-expression") - points(total~mean, data=var.out[rownames(var.out) %in% rownames(hvg),], - pch=21,col=alpha("red",0.3),bg=alpha("red",0.1)) - lines(var.out$mean[order(var.out$mean)],var.out$tech[order(var.out$mean)], - col=alpha("red",0.5),lwd=2) - text(total~mean,data=var.out[rownames(hvg[1:10,]),], - labels=rownames(hvg[1:10,]),pos=4,col=alpha("red",0.5)) - legend("top",bty="n",inset=c(0,-.12),ncol=2,xpd=NA,lwd=c(2,NA,NA),pch=c(NA,21,NA), - col=alpha(c("red","red",NA),0.5),pt.bg=alpha(c(NA,"red",NA),0.3), - legend=c("Predicted technical variance", - paste("Biological variance > 0 at FDR <=",bioCutFDR), - paste(nrow(hvg),"/",nrow(ebN),"highly variable genes"))) - -![](pipeline_QCN_files/figure-markdown_strict/HVG2-1.png) - - ebNorm <- exprs(ebN) - pDat <- cbind( - colData(ebN)[,c("total_features","total_counts")], - colData(ebN)[,which(colnames(colData(ebN)) == "mitoPct"):ncol(colData(ebN))] - ) - save(ebNorm,pDat,hvg,file=paste0(dataPath,"clustInputs.RData")) - - ## Writing the normalized matrix as a .csv in case you want to use it for other things - ## (ie. loading in Python) - write.csv(as.matrix(exprs(ebN)),file=paste0(dataPath,"ebNorm.csv"),quote=F) diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN.pdf b/ToBeConvertedToPkg/pipeline/pipeline_QCN.pdf deleted file mode 100644 index dadba13..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN.pdf and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/HVG2-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/HVG2-1.png deleted file mode 100644 index d934e54..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/HVG2-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/cell_cycle_annotation-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/cell_cycle_annotation-1.png deleted file mode 100644 index c323e27..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/cell_cycle_annotation-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/doubFilt-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/doubFilt-1.png deleted file mode 100644 index dcaa83f..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/doubFilt-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/geneFilt-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/geneFilt-1.png deleted file mode 100644 index b4827fe..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/geneFilt-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/mitoFilt-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/mitoFilt-1.png deleted file mode 100644 index 2f04b90..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/mitoFilt-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/normalize_fig1-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/normalize_fig1-1.png deleted file mode 100644 index 7969ccf..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/normalize_fig1-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/plot_knee2-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/plot_knee2-1.png deleted file mode 100644 index 7838c64..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/plot_knee2-1.png and /dev/null differ diff --git a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/what_are_those_cells-1.png b/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/what_are_those_cells-1.png deleted file mode 100644 index 1a52c7e..0000000 Binary files a/ToBeConvertedToPkg/pipeline/pipeline_QCN_files/figure-markdown_strict/what_are_those_cells-1.png and /dev/null differ diff --git a/man/clusterWiseDEtest.Rd b/man/clusterWiseDEtest.Rd index 4bd382c..d262c78 100644 --- a/man/clusterWiseDEtest.Rd +++ b/man/clusterWiseDEtest.Rd @@ -79,7 +79,7 @@ expression testing is done using the Wilcoxon rank-sum test implemented in the b file="for_scClustViz.RData") # Save these objects so you'll never have to run this slow function again! - runShiny(filePath="for_scClustViz.RData",annotationDB=org.Mm.eg.db) + runShiny(filePath="for_scClustViz.RData") } } diff --git a/man/readFromSeurat.Rd b/man/readFromSeurat.Rd index 442e8a9..76a540b 100644 --- a/man/readFromSeurat.Rd +++ b/man/readFromSeurat.Rd @@ -52,9 +52,18 @@ differential expression testing function, as well as in the Shiny app itself. \examples{ \dontrun{ - data_for_scClustViz <- readFromSeurat(your_seurat_object) - rm(your_seurat_object) + data_for_scClustViz <- readFromSeurat(your_seurat_object, + convertGeneIDs=F) + rm(your_seurat_object) # All the data scClustViz needs is in 'data_for_scClustViz'. + + DE_for_scClustViz <- clusterWiseDEtest(data_for_scClustViz) + + save(data_for_scClustViz,DE_for_scClustViz, + file="for_scClustViz.RData") + # Save these objects so you'll never have to run this slow function again! + + runShiny(filePath="for_scClustViz.RData") } } diff --git a/man/runShiny.Rd b/man/runShiny.Rd index 2545685..25f66c3 100644 --- a/man/runShiny.Rd +++ b/man/runShiny.Rd @@ -4,90 +4,107 @@ \alias{runShiny} \title{Run the scClustViz Shiny app} \usage{ -runShiny(filePath, cellMarkers = list(), annotationDB, rownameKeytype, - exponent = 2, pseudocount = 1, FDRthresh = 0.01, +runShiny(filePath, outPath, cellMarkers = list(), annotationDB, + rownameKeytype, exponent = 2, pseudocount = 1, FDRthresh = 0.01, threshType = "dDR", dDRthresh = 0.15, logGERthresh = 1) } \arguments{ -\item{filePath}{A character vector giving the relative filepath to an RData file -containing two objects. One must be the list outputted by one of the importData -functions (either \code{\link{readFromSeurat}} or \code{\link{readFromManual}}) -containing the data for viewing in the app. The other must be the list outputted by -the \code{\link{clusterWiseDEtest}} function containing differential gene expression -results for viewing in the app. As long as none of the name of the list elements have -been changed, the objects can be named anything you'd like. Note that any files -generated by the Shiny app (ie. saving the selected cluster solution, saving custom -set DE testing results) will be saved/loaded in the same directory as the input file.} +\item{filePath}{A character vector giving the relative filepath to an RData +file containing two objects. One must be the list outputted by one of the +importData functions (either \code{\link{readFromSeurat}} or +\code{\link{readFromManual}}) containing the data for viewing in the app. +The other must be the list outputted by the \code{\link{clusterWiseDEtest}} +function containing differential gene expression results for viewing in the +app. As long as none of the name of the list elements have been changed, +the objects can be named anything you'd like. Note that any files generated +by the Shiny app (ie. saving the selected cluster solution, saving custom +set DE testing results) will be saved/loaded in the same directory as the +input file.} -\item{cellMarkers}{Optional. If you have canonical marker genes for expected cell -types, list them here (see example code below). Note that the gene names must match -rownames of your data (ie. use ensembl IDs if your gene expression matrix rownames -are ensembl IDs). The Shiny app will attempt to label clusters in the tSNE projection -by highest median gene expression.} +\item{outPath}{Optional. If you'd like to save/load any analysis files +to/from a different directory than the input directory (for example, if +you're using data from a package), specify that directory here.} -\item{annotationDB}{Optional. If the gene IDs in your data aren't official gene -symbols, you may want to pass an AnnotationDbi object (ie. org.Mm.eg.db / -org.Hs.eg.db for mouse / human respectively) as this argument. This will allow the -Shiny interface to both display gene symbols when highlighting genes in figures, and -search by gene symbol.} +\item{cellMarkers}{Optional. If you have canonical marker genes for expected +cell types, list them here (see example code below). Note that the gene +names must match rownames of your data (ie. use ensembl IDs if your gene +expression matrix rownames are ensembl IDs). The Shiny app will attempt to +label clusters in the tSNE projection by highest median gene expression.} -\item{rownameKeytype}{Optional. If passing the annotationDB argument, it helps if you -indicate what keytype (see \code{AnnotationDbi::keytypes(annotationDB)}) the rownames -of your data are. If this is missing, it will be determined automatically, but that -will take about 30s.} +\item{annotationDB}{Optional. An AnnotationDbi object for your data's species +(ie. org.Mm.eg.db / org.Hs.eg.db for mouse / human respectively). If +present, gene names will be shown in gene-specific figures, official gene +symbols (instead of your rownames) will be displayed in figures, and gene +searches performed using both official gene symbols and your rownames. If +the gene IDs in your data aren't official gene symbols, using this argument +will make the visualization tool much more useful.} -\item{exponent}{The log base of your normalized input data. Seurat normalization uses -the natural log (set this to exp(1)), while other normalization methods generally use -log2 (set this to 2). This is used if you use the function for testing differential -gene expression between custom sets, and should be set to the same parameters as in -\code{clusterWiseDEtest}.} - -\item{pseudocount}{The pseudocount added to all log-normalized values in your input -data. Most methods use a pseudocount of 1 to eliminate log(0) errors. This is used if -you use the function for testing differential gene expression between custom sets, -and should be set to the same parameters as in \code{clusterWiseDEtest}.} +\item{rownameKeytype}{Optional. A character vector indicating the +AnnotationDbi keytype (see \code{AnnotationDbi::keytypes(annotationDB)}) +that represents your rownames. If the annotationDB argument is present and +this is missing, the function will assume the rownames are official gene +symbols. If less than 80% of rownames map to official gene symbols, the +function will try to predict the appropriate keytype of the rownames (this +takes a bit of time).} -\item{FDRthresh}{The false discovery rate to use as a threshold for determining -statistical significance of differential expression calculated by the Wilcoxon -rank-sum test. This is used if you use the function for testing differential gene -expression between custom sets, and should be set to the same parameters as in +\item{exponent}{Default = 2. The log base of your normalized input data. +Seurat normalization uses the natural log (set this to exp(1)), while other +normalization methods generally use log2 (set this to 2). This is used if +you use the function for testing differential gene expression between +custom sets, and should be set to the same parameters as in \code{clusterWiseDEtest}.} -\item{threshType}{Filtering genes for use in differential expression testing can be -done multiple ways. We use an expression ratio filter for comparing each cluster to -the rest of the tissue as a whole, but find that difference in detection rates works -better when comparing clusters to each other. You can set threshType to -\code{"logGER"} to use a gene expression ratio for all gene filtering, or leave it as -default (\code{"dDR"}) to use difference in detection rate as the thresholding method -when comparing clusters to each other. This is used if you use the function for -testing differential gene expression between custom sets, and should be set to the +\item{pseudocount}{Default = 1. The pseudocount added to all log-normalized +values in your input data. Most methods use a pseudocount of 1 to eliminate +log(0) errors. This is used if you use the function for testing +differential gene expression between custom sets, and should be set to the same parameters as in \code{clusterWiseDEtest}.} -\item{dDRthresh}{Magnitude of detection rate difference of a gene between clusters to -use as filter for determining which genes to test for differential expression between -clusters. This is used if you use the function for testing differential gene -expression between custom sets, and should be set to the same parameters as in +\item{FDRthresh}{Default = 0.01. The false discovery rate to use as a +threshold for determining statistical significance of differential +expression calculated by the Wilcoxon rank-sum test. This is used if you +use the function for testing differential gene expression between custom +sets, and should be set to the same parameters as in \code{clusterWiseDEtest}.} -\item{logGERthresh}{Magnitude of gene expression ratio for a gene between clusters to -use as filter for determining which genes to test for differential expression between -clusters. This is used if you use the function for testing differential gene -expression between custom sets, and should be set to the same parameters as in -\code{clusterWiseDEtest}.} +\item{threshType}{Default = "dDR". Filtering genes for use in differential +expression testing can be done multiple ways. We use an expression ratio +filter for comparing each cluster to the rest of the tissue as a whole, but +find that difference in detection rates works better when comparing +clusters to each other. You can set threshType to \code{"logGER"} to use a +gene expression ratio for all gene filtering, or leave it as default +(\code{"dDR"}) to use difference in detection rate as the thresholding +method when comparing clusters to each other. This is used if you use the +function for testing differential gene expression between custom sets, and +should be set to the same parameters as in \code{clusterWiseDEtest}.} + +\item{dDRthresh}{Default = 0.15. Magnitude of detection rate difference of a +gene between clusters to use as filter for determining which genes to test +for differential expression between clusters. This is used if you use the +function for testing differential gene expression between custom sets, and +should be set to the same parameters as in \code{clusterWiseDEtest}.} + +\item{logGERthresh}{Default = 1. Magnitude of gene expression ratio for a +gene between clusters to use as filter for determining which genes to test +for differential expression between clusters. This is used if you use the +function for testing differential gene expression between custom sets, and +should be set to the same parameters as in \code{clusterWiseDEtest}.} } \value{ -The function causes the scClustViz Shiny GUI app to open in a seperate window. +The function causes the scClustViz Shiny GUI app to open in a + seperate window. } \description{ -Performs differential expression testing between clusters for all cluster solutions in -order to assess the biological relevance of each cluster solution. Differential -expression testing is done using the Wilcoxon rank-sum test implemented in the base R -\code{stats} package. For details about what is being compared in the tests, see the -"Value" section. +Performs differential expression testing between clusters for all cluster +solutions in order to assess the biological relevance of each cluster +solution. Differential expression testing is done using the Wilcoxon rank-sum +test implemented in the base R \code{stats} package. For details about what +is being compared in the tests, see the "Value" section. } \examples{ \dontrun{ - data_for_scClustViz <- readFromSeurat(your_seurat_object) + data_for_scClustViz <- readFromSeurat(your_seurat_object, + convertGeneIDs=F) rm(your_seurat_object) # All the data scClustViz needs is in 'data_for_scClustViz'. @@ -97,19 +114,32 @@ expression testing is done using the Wilcoxon rank-sum test implemented in the b file="for_scClustViz.RData") # Save these objects so you'll never have to run this slow function again! - runShiny(filePath="for_scClustViz.RData", - cellMarkers=list( - "Cortical precursors"=c("Mki67","Sox2","Pax6","Pcna", - "Nes","Cux1","Cux2"), - "Interneurons"=c("Gad1","Gad2","Npy","Sst", - "Lhx6","Tubb3","Rbfox3","Dcx"), - "Cajal-Retzius neurons"="Reln", - "Intermediate progenitors"="Eomes", - "Projection neurons"=c("Tbr1","Satb2","Fezf2","Bcl11b", - "Tle4","Nes","Cux1","Cux2", - "Tubb3","Rbfox3","Dcx") + runShiny(filePath="for_scClustViz.RData") + + ### Using example data from the MouseCortex package ### + devtools::install_github("BaderLab/MouseCortex") + library(org.Mm.eg.db) + runShiny(system.file("e13cortical_forViz.RData",package="MouseCortex"), + # Load input file (E13.5 data) from package directory. + outPath=".", + # Save any further analysis performed in the app to the + # working directory rather than library directory. + annotationDB="org.Mm.eg.db", + # This is an optional argument, but will add annotations. + cellMarkers=list("Cortical precursors"=c("Mki67","Sox2","Pax6", + "Pcna","Nes","Cux1","Cux2"), + "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6", + "Tubb3","Rbfox3","Dcx"), + "Cajal-Retzius neurons"="Reln", + "Intermediate progenitors"="Eomes", + "Projection neurons"=c("Tbr1","Satb2","Fezf2", + "Bcl11b","Tle4","Nes", + "Cux1","Cux2","Tubb3", + "Rbfox3","Dcx") + ) + # This is a list of canonical marker genes per expected cell type. + # The app uses this list to automatically annotate clusters. ) - ) } }