From 3dad59d824c3cb810f96c45dbe6773f2f8094d65 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 28 Feb 2024 17:16:59 -0800
Subject: [PATCH 001/166] add arrow support and ability to read zarr

---
 R/convenience.R   |  26 ++++--
 R/preprocessing.R | 214 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 196 insertions(+), 44 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 5c9367eca..c4cbed9f6 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -193,20 +193,34 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     "centroids" = CreateCentroids(data$centroids),
     "segmentation" = CreateSegmentation(data$segmentations)
   )
+  
   coords <- CreateFOV(
     coords = segmentations.data,
     type = c("segmentation", "centroids"),
     molecules = data$microns,
     assay = assay
   )
+  
+  slot.map <- c(
+    `Blank Codeword` = 'BlankCodeword',
+    `Unassigned Codeword` = 'BlankCodeword',
+    `Negative Control Codeword` = 'ControlCodeword',
+    `Negative Control Probe` = 'ControlProbe'
+  )
 
   xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
-  if("Blank Codeword" %in% names(data$matrix))
-    xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
-  else
-    xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Unassigned Codeword"]])
-  xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
-  xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])
+  
+  if(!is.null(data$metadata)) {
+    Misc(xenium.obj, 'run_metadata') <- data$metadata
+  }
+  
+  if(!is.null(data$segmentation_method)) {
+    xenium.obj <- AddMetaData(xenium.obj, data$segmentation_method)
+  }
+  
+  for(name in intersect(names(slot.map), names(data$matrix))) {
+    xenium.obj[[slot.map[name]]] <- CreateAssayObject(counts = data$matrix[[name]])
+  }
 
   xenium.obj[[fov]] <- coords
   return(xenium.obj)
diff --git a/R/preprocessing.R b/R/preprocessing.R
index 6f5857d0a..58561fca9 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2102,6 +2102,7 @@ ReadNanostring <- function(
 #' \itemize{
 #'  \item \dQuote{matrix}: the counts matrix
 #'  \item \dQuote{microns}: molecule coordinates
+#'  \item \dQuote{segmentation_method}: molecule coordinates
 #' }
 #' @param type Type of cell spatial coordinate matrices to read; choose one
 #' or more of:
@@ -2131,26 +2132,28 @@ ReadNanostring <- function(
 #'
 ReadXenium <- function(
   data.dir,
-  outs = c("matrix", "microns"),
+  outs = c("segmentation_method", "matrix", "microns"),
   type = "centroids",
   mols.qv.threshold = 20
 ) {
   # Argument checking
   type <- match.arg(
     arg = type,
-    choices = c("centroids", "segmentations"),
+    choices = c("centroids", "segmentations", "nucleus_segmentations"),
     several.ok = TRUE
   )
 
   outs <- match.arg(
     arg = outs,
-    choices = c("matrix", "microns"),
+    choices = c("segmentation_method", "matrix", "microns"),
     several.ok = TRUE
   )
 
   outs <- c(outs, type)
 
   has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
+  has_arrow <- requireNamespace("arrow", quietly = TRUE)
+  has_hdf5r <- requireNamespace("hdf5r", quietly = TRUE)
 
   data <- sapply(outs, function(otype) {
     switch(
@@ -2158,10 +2161,65 @@ ReadXenium <- function(
       'matrix' = {
         pmtx <- progressor()
         pmtx(message = 'Reading counts matrix', class = 'sticky', amount = 0)
-        matrix <- suppressWarnings(Read10X(data.dir = file.path(data.dir, "cell_feature_matrix/")))
+        
+        for(option in Filter(function(x) x$req, list(
+          list(filename = "cell_feature_matrix.h5", fn = Read10X_h5, req = has_hdf5r),
+          list(filename = "cell_feature_matrix", fn = Read10X, req = TRUE)
+        ))) {
+          matrix <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(matrix, "try-error")) { break }
+        }
+        
         pmtx(type = "finish")
         matrix
       },
+      'segmentation_method' = {
+        if(!requireNamespace("stars", quietly = TRUE) || !requireNamespace("jsonlite", quietly = TRUE) || !requireNamespace("gmp", quietly = TRUE)) {
+          warning("Reading segmentation_method requires the `stars`, `gmp` and `jsonlite` packages")
+          return(NULL)
+        }
+        
+        if(file.exists(file.path(data.dir, "cells.zarr.zip"))) {
+          pcents <- progressor()
+          pcents(
+            message = 'Loading cell metadata',
+            class = 'sticky',
+            amount = 0
+          )
+          
+          tempdir <- path.expand(tempdir())
+          unzip(file.path(data.dir, "cells.zarr.zip"), exdir = tempdir)
+          zattr <- jsonlite::read_json(file.path(tempdir, '.zattrs'))
+          which_entry <- which(unlist(zattr$polygon_set_names) == 'cell')
+          
+          indices <- stars::read_mdim(file.path(tempdir, 'polygon_sets', which_entry - 1, 'cell_index'))$cell_index + 1
+          indices[is.na(indices)] <- 1
+          
+          ids <- stars::read_mdim(file.path(tempdir, 'cell_id'))$cell_id
+          ids[is.na(ids)] <- 0
+          
+          ids <- paste0(
+            gsub(' ', 'a', sprintf('%8s', sapply(
+              strsplit(as.character(gmp::as.bigz(ids[1,]), 16), ''),
+              function(id) {
+                rawToChar(as.raw(sapply(id, function(x) {
+                  as.numeric(charToRaw(x)) +
+                    ifelse(is.na(suppressWarnings(as.numeric(x))), 10, 49)
+                })))
+              }
+            ))), '-', ids[2,])
+          
+          method <- stars::read_mdim(file.path(tempdir, 'polygon_sets', which_entry - 1, 'method'))$method + 1
+          method[is.na(method)] <- 1
+          
+          segmentation_method <- unlist(zattr$segmentation_methods)[method]
+          
+          pcents(type = "finish")
+          data.frame(segmentation_method = segmentation_method, row.names = ids)
+        } else {
+          NULL
+        }
+      },
       'centroids' = {
         pcents <- progressor()
         pcents(
@@ -2169,19 +2227,32 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        if (has_dt) {
-          cell_info <- as.data.frame(data.table::fread(file.path(data.dir, "cells.csv.gz")))
-        } else {
-          cell_info <- read.csv(file.path(data.dir, "cells.csv.gz"))
+        
+        col.use <- c(x_centroid = 'x', y_centroid = 'y', cell_id = 'cell')
+        
+        for(option in Filter(function(x) x$req, list(
+          list(
+            filename = "cells.parquet",
+            fn = function(x) as.data.frame(arrow::read_parquet(x, col_select = names(col.use))),
+            req = has_arrow
+          ),
+          list(
+            filename = "cells.csv.gz",
+            fn = function(x) data.table::fread(x, data.table = FALSE, stringsAsFactors = FALSE, select = names(col.use)),
+            req = has_dt
+          ),
+          list(filename = "cells.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
+        ))) {
+          cell_info <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(cell_info, "try-error")) { break }
         }
-        cell_centroid_df <- data.frame(
-          x = cell_info$x_centroid,
-          y = cell_info$y_centroid,
-          cell = cell_info$cell_id,
-          stringsAsFactors = FALSE
-        )
+        
+        cell_info <- cell_info[, names(col.use)]
+        colnames(cell_info) <- col.use
+        
         pcents(type = 'finish')
-        cell_centroid_df
+        
+        cell_info
       },
       'segmentations' = {
         psegs <- progressor()
@@ -2190,17 +2261,61 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-
-        # load cell boundaries
-        if (has_dt) {
-          cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
-        } else {
-          cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE)
+        
+        for(option in Filter(function(x) x$req, list(
+          list(
+            filename = "cell_boundaries.parquet",
+            fn = function(x) as.data.frame(arrow::read_parquet(x)),
+            req = has_arrow
+          ),
+          list(
+            filename = "cell_boundaries.csv.gz",
+            fn = function(x) data.table::fread(x, data.table = FALSE, stringsAsFactors = FALSE),
+            req = has_dt
+          ),
+          list(filename = "cell_boundaries.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
+        ))) {
+          cell_boundaries_df <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(cell_boundaries_df, "try-error")) { break }
         }
-        names(cell_boundaries_df) <- c("cell", "x", "y")
+        
+        colnames(cell_boundaries_df) <- c('cell', 'x', 'y')
+        
         psegs(type = "finish")
+        
         cell_boundaries_df
       },
+      'nucleus_segmentations' = {
+        psegs <- progressor()
+        psegs(
+          message = 'Loading nucleus segmentations',
+          class = 'sticky',
+          amount = 0
+        )
+        
+        for(option in Filter(function(x) x$req, list(
+          list(
+            filename = "nucleus_boundaries.parquet",
+            fn = function(x) as.data.frame(arrow::read_parquet(x)),
+            req = has_arrow
+          ),
+          list(
+            filename = "nucleus_boundaries.csv.gz",
+            fn = function(x) data.table::fread(x, data.table = FALSE, stringsAsFactors = FALSE),
+            req = has_dt
+          ),
+          list(filename = "nucleus_boundaries.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
+        ))) {
+          nucleus_boundaries_df <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(nucleus_boundaries_df, "try-error")) { break }
+        }
+        
+        colnames(nucleus_boundaries_df) <- c('cell', 'x', 'y')
+        
+        psegs(type = "finish")
+        
+        nucleus_boundaries_df
+      },
       'microns' = {
         pmicrons <- progressor()
         pmicrons(
@@ -2209,28 +2324,51 @@ ReadXenium <- function(
           amount = 0
         )
 
-        # molecules
-        if (has_dt) {
-          tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz")))
-          transcripts <- subset(tx_dt, qv >= mols.qv.threshold)
-        } else {
-          transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
-          transcripts <- subset(transcripts, qv >= mols.qv.threshold)
+        col.use = c(x_location = 'x', y_location = 'y', feature_name = 'gene')
+        
+        for(option in Filter(function(x) x$req, list(
+          list(
+            filename = "transcripts.parquet",
+            fn = function(x) as.data.frame(arrow::read_parquet(x, col_select = names(col.use))),
+            req = has_arrow
+          ),
+          list(
+            filename = "transcripts.csv.gz",
+            fn = function(x) data.table::fread(x, data.table = FALSE, select = names(col.use), stringsAsFactors = FALSE),
+            req = has_dt
+          ),
+          list(filename = "transcripts.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
+        ))) {
+          transcripts <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(transcripts, "try-error")) { break }
         }
-
-        df <-
-          data.frame(
-            x = transcripts$x_location,
-            y = transcripts$y_location,
-            gene = transcripts$feature_name,
-            stringsAsFactors = FALSE
-          )
+        
+        transcripts <- transcripts[, names(col.use)]
+        colnames(transcripts) <- col.use
+        
         pmicrons(type = 'finish')
-        df
+        
+        transcripts
       },
       stop("Unknown Xenium input type: ", otype)
     )
   }, USE.NAMES = TRUE)
+  
+  metadata <- file.path(data.dir, "experiment.xenium")
+  if(file.exists(metadata) && requireNamespace("jsonlite", quietly = TRUE)) {
+    meta <- jsonlite::read_json(metadata)
+    data$metadata <- meta[
+      intersect(
+        names(meta),
+        c(
+          'run_name', 'run_start_time', 'region_name',
+          'preservation_method', 'panel_name', 'panel_organism',
+          'panel_tissue_type', 'instrument_sw_version',
+          'segmentation_stain'
+        )
+      )
+    ]
+  }
   return(data)
 }
 

From b3a4c010ac10c0eec93233986431c6f4b97868cf Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 28 Feb 2024 17:24:10 -0800
Subject: [PATCH 002/166] wwip

---
 vignettes/seurat5_spatial_vignette_2.Rmd | 113 ++++++++++++++++-------
 1 file changed, 79 insertions(+), 34 deletions(-)

diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd
index f3e765f21..88395262f 100644
--- a/vignettes/seurat5_spatial_vignette_2.Rmd
+++ b/vignettes/seurat5_spatial_vignette_2.Rmd
@@ -6,7 +6,8 @@ output:
     df_print: kable
 date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`'
 ---
-***
+
+------------------------------------------------------------------------
 
 ```{r setup, include=FALSE}
 all_times <- list()  # store the time for each chunk
@@ -37,9 +38,9 @@ In this vignette, we introduce a Seurat extension to analyze new types of spatia
 
 We update the Seurat infrastructure to enable the analysis, visualization, and exploration of these exciting datasets. In this vignette, we focus on three datasets produced by different multiplexed imaging technologies, each of which is publicly available. We will be adding support for additional imaging-based technologies in the coming months.
 
-* Vizgen MERSCOPE (Mouse Brain)
-* Nanostring CosMx Spatial Molecular Imager (FFPE Human Lung)
-* Akoya CODEX (Human Lymph Node)
+-   Vizgen MERSCOPE (Mouse Brain)
+-   Nanostring CosMx Spatial Molecular Imager (FFPE Human Lung)
+-   Akoya CODEX (Human Lymph Node)
 
 First, we load the packages necessary for this vignette.
 
@@ -54,11 +55,11 @@ library(ggplot2)
 
 This dataset was produced using the Vizgen MERSCOPE system, which utilizes the MERFISH technology. The total dataset is available for [public download](https://info.vizgen.com/mouse-brain-data), and contains nine samples (three full coronal slices of the mouse brain, with three biological replicates per slice). The gene panel consists of 483 gene targets, representing known anonical cell type markers, nonsensory G-Protein coupled receptors (GPCRs), and Receptor Tyrosine Kinases (RTKs). In this vignette, we analyze one of the samples - slice 2, replicate 1. The median number of transcripts detected in each cell is 206.
 
-First, we read in the dataset and create a Seurat object. 
+First, we read in the dataset and create a Seurat object.
 
-We use the `LoadVizgen()` function, which we have written to read in the output  of the Vizgen analysis pipeline. The resulting Seurat object contains the following information:
+We use the `LoadVizgen()` function, which we have written to read in the output of the Vizgen analysis pipeline. The resulting Seurat object contains the following information:
 
-* A count matrix, indicating the number of observed molecules for each of the 483 transcripts in each cell. This matrix is analogous to a count matrix in scRNA-seq, and is stored by default in the RNA assay of the Seurat object
+-   A count matrix, indicating the number of observed molecules for each of the 483 transcripts in each cell. This matrix is analogous to a count matrix in scRNA-seq, and is stored by default in the RNA assay of the Seurat object
 
 ```{r, message=FALSE, warning=FALSE}
 # Loading segmentations is a slow process and multi processing with the future pacakge is recommended
@@ -68,32 +69,42 @@ vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/vignette_data/vizgen/s2r1/
 The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object:
 
 <details>
-  <summary>**Cell Centroids: The spatial coordinates marking the centroid for each cell being profiled**</summary>
+
+<summary>**Cell Centroids: The spatial coordinates marking the centroid for each cell being profiled**</summary>
 
 ```{r}
 # Get the center position of each centroid. There is one row per cell in this dataframe.
 head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["centroids"]]))
 ```
+
 </details>
+
 <details>
-  <summary>**Cell Segmentation Boundaries: The spatial coordinates that describe the polygon segmentation of each single cell**</summary>
+
+<summary>**Cell Segmentation Boundaries: The spatial coordinates that describe the polygon segmentation of each single cell**</summary>
 
 ```{r}
 # Get the coordinates for each segmentation vertice. Each cell will have a variable number of vertices describing its shape.
 head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["segmentation"]]))
 ```
+
 </details>
+
 <details>
-  <summary>**Molecule positions: The spatial coordinates for each individual molecule that was detected during the multiplexed smFISH experiment.**</summary>
+
+<summary>**Molecule positions: The spatial coordinates for each individual molecule that was detected during the multiplexed smFISH experiment.**</summary>
 
 ```{r}
 # Fetch molecules positions for Chrm1
 head(FetchData(vizgen.obj[["s2r1"]][["molecules"]], vars="Chrm1"))
 ```
+
 </details>
+
 \
 
 ## Preprocessing and unsupervised analysis
+
 We start by performing a standard unsupervised clustering analysis, essentially first treating the dataset as an scRNA-seq experiment. We use SCTransform-based normalization, though we slightly modify the default clipping parameters to mitigate the effect of outliers that we occasionally observe in smFISH experiments. After normalization, we can run dimensional reduction and clustering.
 
 ```{r analysis, results='hide'}
@@ -117,18 +128,20 @@ ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "polychrome", axes = TRUE)
 You can also customize multiple aspect of the plot, including the color scheme, cell border widths, and size (see below).
 
 <details>
-  <summary>**Customizing spatial plots in Seurat**</summary>
+
+<summary>**Customizing spatial plots in Seurat**</summary>
 
 The `ImageDimPlot()` and `ImageFeaturePlot()` functions have a few parameters which you can customize individual visualizations. These include:
 
-* alpha: Ranges from 0 to 1. Sets the transparency of within-cell coloring.
-* size: determines the size of points representing cells, if centroids are being plotted
-* cols: Sets the color scheme for the internal shading of each cell. Examples settings are `polychrome`, `glasbey`, `Paired`, `Set3`, and `parade`. Default is the ggplot2 color palette
-* shuffle.cols: In some cases the selection of `cols` is more effective when the same colors are assigned to different clusters. Set `shuffle.cols = TRUE` to randomly shuffle the colors in the palette.
-* border.size: Sets the width of the cell segmentation borders. By default, segmentations are plotted with a border size of 0.3 and centroids are plotted without border.
-* border.color: Sets the color of the cell segmentation borders
-* dark.background: Sets a black background color (TRUE by default)
-* axes: Display
+-   alpha: Ranges from 0 to 1. Sets the transparency of within-cell coloring.
+-   size: determines the size of points representing cells, if centroids are being plotted
+-   cols: Sets the color scheme for the internal shading of each cell. Examples settings are `polychrome`, `glasbey`, `Paired`, `Set3`, and `parade`. Default is the ggplot2 color palette
+-   shuffle.cols: In some cases the selection of `cols` is more effective when the same colors are assigned to different clusters. Set `shuffle.cols = TRUE` to randomly shuffle the colors in the palette.
+-   border.size: Sets the width of the cell segmentation borders. By default, segmentations are plotted with a border size of 0.3 and centroids are plotted without border.
+-   border.color: Sets the color of the cell segmentation borders
+-   dark.background: Sets a black background color (TRUE by default)
+-   axes: Display
+
 </details>
 
 Since it can be difficult to visualize the spatial localization patterns of an individual cluster when viewing them all together, we can highlight all cells that belong to a particular cluster:
@@ -139,7 +152,7 @@ p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vi
 p1 + p2
 ```
 
-We can find markers of individual clusters and visualize their spatial expression pattern. We can color cells based on their quantified expression of an individual gene, using `ImageFeaturePlot()`, which is analagous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. Since MERFISH images individual molecules, we can also visualize the location of individual *molecules*. 
+We can find markers of individual clusters and visualize their spatial expression pattern. We can color cells based on their quantified expression of an individual gene, using `ImageFeaturePlot()`, which is analagous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. Since MERFISH images individual molecules, we can also visualize the location of individual *molecules*.
 
 ```{r, fig.height=7, fig.width=12}
 p1 <- ImageFeaturePlot(vizgen.obj, features = "Slc17a7")
@@ -160,7 +173,7 @@ p1 + p2
 
 The updated Seurat spatial framework has the option to treat cells as individual points, or also to visualize cell boundaries (segmentations). By default, Seurat ignores cell segmentations and treats each cell as a point ('centroids'). This speeds up plotting, especially when looking at large areas, where cell boundaries are too small to visualize.
 
-We can zoom into a region of tissue, creating a new field of view. For example, we can zoom into a region that contains the hippocampus. Once zoomed-in, we can set `DefaultBoundary()` to show cell segmentations. You can also 'simplify' the cell segmentations, reducing the number of edges in each polygon to  speed up plotting.
+We can zoom into a region of tissue, creating a new field of view. For example, we can zoom into a region that contains the hippocampus. Once zoomed-in, we can set `DefaultBoundary()` to show cell segmentations. You can also 'simplify' the cell segmentations, reducing the number of edges in each polygon to speed up plotting.
 
 ```{r, fig.height=5, fig.width=14}
 # create a Crop
@@ -187,18 +200,41 @@ p1 + p2 + p3
 ```
 
 <details>
-  <summary>**What is the tol parameter?**</summary>
 
-The tol parameter determines how simplified the resulting segmentations are. A higher value of tol will reduce the number of vertices more drastically which will speed up plotting, but some segmentation detail will be lost. See https://rgeos.r-forge.r-project.org/reference/topo-unary-gSimplify.html for examples using different values for tol.
+<summary>**What is the tol parameter?**</summary>
+
+The tol parameter determines how simplified the resulting segmentations are. A higher value of tol will reduce the number of vertices more drastically which will speed up plotting, but some segmentation detail will be lost. See <https://rgeos.r-forge.r-project.org/reference/topo-unary-gSimplify.html> for examples using different values for tol.
 
 </details>
 
 We can visualize individual molecules plotted at higher resolution after zooming-in
+
 ```{r, fig.height=8, fig.width=8}
 # Since there is nothing behind the segmentations, alpha will slightly mute colors
 ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green"))
 ```
 
+# Human Lung: 10x Genomics Xenium In Situ
+
+This dataset is a preview of the Xenium multimodal cell segmentation solution using a development version of the assay user guide and analysis software. It uses the [Xenium Human Multi-Tissue and Cancer Panel](https://www.10xgenomics.com/support/in-situ-gene-expression/documentation/steps/panel-design/pre-designed-xenium-gene-expression-panels) (377 genes) which was pre-designed by 10x Genomics. In this vignette, we will demonstrate how to load Xenium data for analysis and visualization using Seurat and, in particular, how to parse and visualize cell metadata. Note that this vignette requires the use of some optional dependencies in Seurat, namely `stars`, `jsonlite` and `gmp` in order to read data from `.zarr` files.
+
+This uses the full Xenium output bundle available from the [FFPE Human Lung Cancer with Xenium Multimodal Cell Segmentation Preview Data](https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard) page, which can be downloaded as described below.
+
+```{bash, eval=FALSE}
+wget https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_humanLung_Cancer_FFPE/Xenium_V1_humanLung_Cancer_FFPE_outs.zip
+unzip Xenium_V1_humanLung_Cancer_FFPE_outs.zip
+```
+
+We will first load in the dataset and create the Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. Note that this dataset is moderate size. Installing `arrow` will permit you to load the data from Parquet files, which is much more efficient than from csv.
+
+```{r, results='hide'}
+path <- "~/yard/data/Xenium_V1_humanLung_Cancer_FFPE_outs"
+# Load the Xenium data
+xenium.obj <- LoadXenium(path, fov = "fov")
+# remove cells with 0 counts
+xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)
+```
+
 # Mouse Brain: 10x Genomics Xenium In Situ
 
 In this section we'll analyze data produced by the Xenium platform. The vignette demonstrates how to load the per-transcript location data, cell x gene matrix, cell segmentation, and cell centroid information available in the Xenium outputs. The resulting Seurat object will contain the gene expression profile of each cell, the centroid and boundary of each cell, and the location of each individual detected transcript. The per-cell gene expression profiles are similar to standard single-cell RNA-seq and can be analyzed using the same tools.
@@ -223,11 +259,13 @@ xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)
 Spatial information is loaded into slots of the Seurat object, labelled by the name of "field of view" (FOV) being loaded. Initially all the data is loaded into the FOV named `fov`. Later, we will make a cropped FOV that zooms into a region of interest.
 
 Standard QC plots provided by Seurat are available via the `Xenium` assay. Here are violin plots of genes per cell (`nFeature_Xenium`) and transcript counts per cell (`nCount_Xenium`)
+
 ```{r vlnplot.xenium}
 VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0)
 ```
 
 Next, we plot the positions of the pan-inhibitory neuron marker Gad1, inhibitory neuron sub-type markers Pvalb, and Sst, and astrocyte marker Gfap on the tissue using `ImageDimPlot()`.
+
 ```{r p2.xenium, fig.width=10, fig.height=8}
 ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfap"), nmols = 20000)
 ```
@@ -238,11 +276,13 @@ ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 5, width =
 ```
 
 Here we visualize the expression level of some key layer marker genes at the per-cell level using `ImageFeaturePlot()` which is analogous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. We manually adjust the `max.cutoff` for each gene to roughly the 90th percentile (which can be specified with `max.cutoff='q90'`) of it's count distribution to improve contrast.
+
 ```{r mat.xenium, message=FALSE, warning=FALSE, fig.width=12, fig.height=12}
 ImageFeaturePlot(xenium.obj, features = c("Cux2", "Rorb", "Bcl11b", "Foxp2"), max.cutoff = c(25, 35, 12, 10), size = 0.75, cols = c("white", "red"))
 ```
 
 We can zoom in on a chosen area with the `Crop()` function. Once zoomed-in, we can visualize cell segmentation boundaries along with individual molecules.
+
 ```{r cropping.xenium, message=FALSE, warning=FALSE, fig.width=10, fig.height=8}
 cropped.coords <- Crop(xenium.obj[["fov"]], x = c(1200, 2900), y = c(3750, 4550), coords = "plot")
 xenium.obj[["zoom"]] <- cropped.coords
@@ -255,6 +295,7 @@ ImageDimPlot(xenium.obj, fov = "zoom",
 ```
 
 Next, we use SCTransform for normalization followed by standard dimensionality reduction and clustering. This step takes about 5 minutes from start to finish.
+
 ```{r unsupervised.xenium, results='hide'}
 xenium.obj <- SCTransform(xenium.obj, assay = "Xenium")
 xenium.obj <- RunPCA(xenium.obj, npcs = 30, features = rownames(xenium.obj))
@@ -264,22 +305,24 @@ xenium.obj <- FindClusters(xenium.obj, resolution = 0.3)
 ```
 
 We can then visualize the results of the clustering by coloring each cell according to its cluster either in UMAP space with `DimPlot()` or overlaid on the image with `ImageDimPlot()`.
+
 ```{r umap.xenium, fig.width=10, fig.height=7}
 DimPlot(xenium.obj)
 ```
 
 We can visualize the expression level of the markers we looked at earlier on the UMAP coordinates.
+
 ```{r features.xenium, fig.width=8, fig.height=10}
 FeaturePlot(xenium.obj, features = c("Cux2", "Bcl11b", "Foxp2", "Gad1", "Sst", "Gfap"))
 ```
 
 We can now use `ImageDimPlot()` to color the cell positions colored by the cluster labels determined in the previous step.
+
 ```{r clusters.xenium, fig.width=13, fig.height=13}
 ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75)
 ```
 
-Using the positional information of each cell, we compute spatial niches.
-We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. The Allen Brain reference can be installed [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1).
+Using the positional information of each cell, we compute spatial niches. We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. The Allen Brain reference can be installed [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1).
 
 Below, we use Slc17a7 expression to help determine the cortical region.
 
@@ -355,7 +398,7 @@ keep.cells <- Cells(xenium.obj)[!is.na(xenium.obj$predicted.celltype)]
 xenium.obj <- subset(xenium.obj, cells = keep.cells)
 ```
 
-While the previous analyses consider each cell independently, spatial data enables cells to be defined not just by their neighborhood, but also by their broader spatial context. In Seurat v5, we introduce support for 'niche' analysis of spatial data, which demarcates regions of tissue ('niches'), each of which is defined by a different composition of spatially adjacent cell types. Inspired by methods in [Goltsev et al, Cell 2018](https://www.sciencedirect.com/science/article/pii/S0092867418309048) and [He et al, NBT 2022](https://www.nature.com/articles/s41587-022-01483-z), we consider the 'local neighborhood' for each cell - consisting of its `k.neighbor` spatially closest neighbors, and count the occurrences of each cell type present in this neighborhood. We then use k-means clustering to group cells that have similar neighborhoods together, into spatial niches. 
+While the previous analyses consider each cell independently, spatial data enables cells to be defined not just by their neighborhood, but also by their broader spatial context. In Seurat v5, we introduce support for 'niche' analysis of spatial data, which demarcates regions of tissue ('niches'), each of which is defined by a different composition of spatially adjacent cell types. Inspired by methods in [Goltsev et al, Cell 2018](https://www.sciencedirect.com/science/article/pii/S0092867418309048) and [He et al, NBT 2022](https://www.nature.com/articles/s41587-022-01483-z), we consider the 'local neighborhood' for each cell - consisting of its `k.neighbor` spatially closest neighbors, and count the occurrences of each cell type present in this neighborhood. We then use k-means clustering to group cells that have similar neighborhoods together, into spatial niches.
 
 We call the `BuildNicheAssay` function from within Seurat to construct a new assay called `niche` containing the cell type composition spatially neighboring each cell. A metadata column called `niches` is also returned, which contains cluster assignments based on the niche assay.
 
@@ -402,11 +445,11 @@ table(xenium.obj$predicted.celltype, xenium.obj$niches)
 
 # Human Lung: Nanostring CosMx Spatial Molecular Imager
 
-This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. 
+This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts.
 
-In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `LoadNanostring()` function, which parses the outputs available on the public download site. Note that the coordinates for the cell boundaries were provided by Nanostring by request, and are available for download [here](https://www.dropbox.com/s/hl3peavrx92bluy/Lung5_Rep1-polygons.csv?dl=0). 
+In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `LoadNanostring()` function, which parses the outputs available on the public download site. Note that the coordinates for the cell boundaries were provided by Nanostring by request, and are available for download [here](https://www.dropbox.com/s/hl3peavrx92bluy/Lung5_Rep1-polygons.csv?dl=0).
 
-For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0.  You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process.
+For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process.
 
 ```{r load}
 nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/vignette_data/nanostring/lung5_rep1", fov="lung5.rep1")
@@ -427,7 +470,7 @@ nano.obj <- SCTransform(nano.obj, assay = "Nanostring", clip.range = c(-10, 10),
 head(slot(object = nano.obj, name = "meta.data")[2:5])
 ```
 
-We can visualize the Nanostring cells and annotations, projected onto the reference-defined UMAP. Note that for this NSCLC sample, tumor samples are annotated as 'basal', which is the closest cell type match in the healthy reference. 
+We can visualize the Nanostring cells and annotations, projected onto the reference-defined UMAP. Note that for this NSCLC sample, tumor samples are annotated as 'basal', which is the closest cell type match in the healthy reference.
 
 ```{r, fig.width=9, fig.height=4}
 DimPlot(nano.obj)
@@ -491,8 +534,7 @@ ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", alpha = 0.3, molecule
 
 This dataset was produced using Akoya CODEX system. The CODEX system performs multiplexed spatially-resolved protein profiling, iteratively visualizing antibody-binding events. The dataset here represents a tissue section from a human lymph node, and was generated by the University of Florida as part of the Human Biomolecular Atlas Program (HuBMAP). More information about the sample and experiment is available [here](https://portal.hubmapconsortium.org/browse/dataset/c95d9373d698faf60a66ffdc27499fe1). The protein panel in this dataset consists of 28 markers, and protein intensities were quantified as part of the Akoya processor pipeline, which outputs a CSV file providing the intensity of each marker in each cell, as well as the cell coordinates. The file is available for public download via Globus [here](https://app.globus.org/file-manager?origin_id=af603d86-eab9-4eec-bb1d-9d26556741bb&origin_path=%2Fc95d9373d698faf60a66ffdc27499fe1%2Fdrv_CX_20-008_lymphnode_n10_reg001%2Fprocessed_2020-12-2320-008LNn10r001%2Fsegm%2Fsegm-1%2Ffcs%2Fcompensated%2F).
 
-
-First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function in Seurat: 
+First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function in Seurat:
 
 ```{r}
 codex.obj <- LoadAkoya(
@@ -524,7 +566,7 @@ DimPlot(codex.obj, label = TRUE, label.box = TRUE) + NoLegend()
 ImageDimPlot(codex.obj, cols = "parade")
 ```
 
-The expression patters of individual markers clearly denote different cell types and spatial structures, including Lyve1 (lymphatic endothelial cells), CD34 (blood endothelial cells), and CD21 (B cells). As expected, endothelial cells group together into vessels, and B cells are key components of specialized microstructures known as germinal zones. You can read more about protein markers in this dataset, as well as cellular networks in human lynmphatic tissues, in this  [preprint](https://www.biorxiv.org/content/10.1101/2021.10.20.465151v1.full).
+The expression patters of individual markers clearly denote different cell types and spatial structures, including Lyve1 (lymphatic endothelial cells), CD34 (blood endothelial cells), and CD21 (B cells). As expected, endothelial cells group together into vessels, and B cells are key components of specialized microstructures known as germinal zones. You can read more about protein markers in this dataset, as well as cellular networks in human lynmphatic tissues, in this [preprint](https://www.biorxiv.org/content/10.1101/2021.10.20.465151v1.full).
 
 ```{r, fig.width=9, fig.height=8}
 p1 <- ImageFeaturePlot(codex.obj, fov = "HBM754.WKLP.262", features = c("CD34", "CD21", "Lyve1"), min.cutoff = "q10", max.cutoff = "q90")
@@ -535,10 +577,13 @@ p1 + p2
 Each of these datasets represents an opportunity to learn organizing principles that govern the spatial localization of different cell types. Stay tuned for future updates to Seurat enabling further exploration and characterization of the relationship between spatial position and molecular state.
 
 <details>
-  <summary>**Session Info**</summary>
+
+<summary>**Session Info**</summary>
+
 ```{r}
 sessionInfo()
 ```
+
 </details>
 
 ```{r save.times, include=FALSE}

From d55be3ab6a2196fb1e7183d6b3da6793bea52db6 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Thu, 29 Feb 2024 16:02:21 -0800
Subject: [PATCH 003/166] clean up the API a bit

---
 R/convenience.R   | 76 +++++++++++++++++++++++++++++++++++++++--------
 R/preprocessing.R |  4 ++-
 2 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index c4cbed9f6..4e06343bc 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -175,31 +175,81 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
 #' @param data.dir Path to folder containing Nanostring SMI outputs
 #' @param fov FOV name
 #' @param assay Assay name
+#' @param mols.qv.threshold Remove transcript molecules with
+#' a QV less than this threshold. QV >= 20 is the standard threshold
+#' used to construct the cell x gene count matrix.
+#' @param cell.centroids Whether or not to load cell centroids
+#' @param molecule.coordinates Whether or not to load molecule pixel coordinates
+#' @param segmentations One of "cell", "nucleus" or NULL (to load either cell
+#' segmentations, nucleus segmentations or neither)
 #'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
-#' CreateSegmentation CreateSeuratObject
+#' CreateSegmentation CreateSeuratObject CreateMolecules
 #'
 #' @export
 #'
 #' @rdname ReadXenium
 #'
-LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
+LoadXenium <- function(
+  data.dir,
+  fov = 'fov',
+  assay = 'Xenium',
+  mols.qv.threshold = 20,
+  cell.centroids = TRUE,
+  molecule.coordinates = TRUE,
+  segmentations = NULL
+) {
+  if(!is.null(segmentations) && !(segmentations %in% c('nucleus', 'cell'))) {
+    stop('segmentations must be NULL or one of "nucleus", "cell"')
+  }
+  
+  if(!cell.centroids && is.null(segmentations)) {
+    stop(
+      "Must load either centroids or cell/nucleus segmentations"
+    )
+  }
+  
   data <- ReadXenium(
     data.dir = data.dir,
-    type = c("centroids", "segmentations"),
+    type = c("centroids", "segmentations", "nucleus_segmentations")[
+      c(cell.centroids, isTRUE(segmentations == 'cell'), isTRUE(segmentations == 'nucleus'))
+    ],
+    outs = c("segmentation_method", "matrix", "microns")[
+      c(cell.centroids || isTRUE(segmentations != 'nucleus'), TRUE, molecule.coordinates && (cell.centroids || !is.null(segmentations)))
+    ],
+    mols.qv.threshold = mols.qv.threshold
   )
+  
+  segmentations <- intersect(c("segmentations", "nucleus_segmentations"), names(data))
 
-  segmentations.data <- list(
-    "centroids" = CreateCentroids(data$centroids),
-    "segmentation" = CreateSegmentation(data$segmentations)
-  )
+  segmentations.data <- Filter(Negate(is.null), list(
+    centroids = if(is.null(data$centroids)) {
+      NULL
+    } else {
+      CreateCentroids(data$centroids)
+    },
+    segmentations = if(length(segmentations) > 0) {
+      CreateSegmentation(
+        data[[segmentations]]
+      )
+    } else {
+      NULL
+    }
+  ))
   
-  coords <- CreateFOV(
-    coords = segmentations.data,
-    type = c("segmentation", "centroids"),
-    molecules = data$microns,
-    assay = assay
-  )
+  coords <- if(length(segmentations.data) > 0) {
+    CreateFOV(
+      segmentations.data,
+      assay = assay,
+      molecules = if(is.null(data$microns)) {
+        NULL
+      } else {
+        CreateMolecules(data$microns)
+      }
+    )
+  } else {
+    NULL
+  }
   
   slot.map <- c(
     `Blank Codeword` = 'BlankCodeword',
diff --git a/R/preprocessing.R b/R/preprocessing.R
index 58561fca9..44d02f1a8 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2102,13 +2102,15 @@ ReadNanostring <- function(
 #' \itemize{
 #'  \item \dQuote{matrix}: the counts matrix
 #'  \item \dQuote{microns}: molecule coordinates
-#'  \item \dQuote{segmentation_method}: molecule coordinates
+#'  \item \dQuote{segmentation_method}: cell segmentation method (for runs which
+#'  use multi-modal segmentation)
 #' }
 #' @param type Type of cell spatial coordinate matrices to read; choose one
 #' or more of:
 #' \itemize{
 #'  \item \dQuote{centroids}: cell centroids in pixel coordinate space
 #'  \item \dQuote{segmentations}: cell segmentations in pixel coordinate space
+#'  \item \dQuote{nucleus_segmentations}: nucleus segmentations in pixel coordinate space
 #' }
 #' @param mols.qv.threshold Remove transcript molecules with
 #' a QV less than this threshold. QV >= 20 is the standard threshold

From 87396254d59ae9f8cdcfdfcbc11b62ff2bf4c110 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Fri, 1 Mar 2024 15:26:45 -0800
Subject: [PATCH 004/166] bulk out vignette

---
 R/convenience.R                          |  6 +-
 R/preprocessing.R                        | 34 ++++++++---
 vignettes/seurat5_spatial_vignette_2.Rmd | 78 +++++++++++++++++++++---
 3 files changed, 100 insertions(+), 18 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 4e06343bc..0170fad31 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -197,7 +197,8 @@ LoadXenium <- function(
   mols.qv.threshold = 20,
   cell.centroids = TRUE,
   molecule.coordinates = TRUE,
-  segmentations = NULL
+  segmentations = NULL,
+  flip.xy = FALSE
 ) {
   if(!is.null(segmentations) && !(segmentations %in% c('nucleus', 'cell'))) {
     stop('segmentations must be NULL or one of "nucleus", "cell"')
@@ -217,7 +218,8 @@ LoadXenium <- function(
     outs = c("segmentation_method", "matrix", "microns")[
       c(cell.centroids || isTRUE(segmentations != 'nucleus'), TRUE, molecule.coordinates && (cell.centroids || !is.null(segmentations)))
     ],
-    mols.qv.threshold = mols.qv.threshold
+    mols.qv.threshold = mols.qv.threshold,
+    flip.xy = flip.xy
   )
   
   segmentations <- intersect(c("segmentations", "nucleus_segmentations"), names(data))
diff --git a/R/preprocessing.R b/R/preprocessing.R
index 44d02f1a8..4f6171830 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2136,7 +2136,8 @@ ReadXenium <- function(
   data.dir,
   outs = c("segmentation_method", "matrix", "microns"),
   type = "centroids",
-  mols.qv.threshold = 20
+  mols.qv.threshold = 20,
+  flip.xy = F
 ) {
   # Argument checking
   type <- match.arg(
@@ -2230,7 +2231,11 @@ ReadXenium <- function(
           amount = 0
         )
         
-        col.use <- c(x_centroid = 'x', y_centroid = 'y', cell_id = 'cell')
+        col.use <- c(
+          x_centroid = letters[24 + flip.xy],
+          y_centroid = letters[25 - flip.xy],
+          cell_id = 'cell'
+        )
         
         for(option in Filter(function(x) x$req, list(
           list(
@@ -2281,7 +2286,11 @@ ReadXenium <- function(
           if(!inherits(cell_boundaries_df, "try-error")) { break }
         }
         
-        colnames(cell_boundaries_df) <- c('cell', 'x', 'y')
+        colnames(cell_boundaries_df) <- c(
+          'cell',
+          letters[24 + flip.xy],
+          letters[25 - flip.xy]
+        )
         
         psegs(type = "finish")
         
@@ -2312,7 +2321,11 @@ ReadXenium <- function(
           if(!inherits(nucleus_boundaries_df, "try-error")) { break }
         }
         
-        colnames(nucleus_boundaries_df) <- c('cell', 'x', 'y')
+        colnames(nucleus_boundaries_df) <- c(
+          'cell',
+          letters[24 + flip.xy],
+          letters[25 - flip.xy]
+        )
         
         psegs(type = "finish")
         
@@ -2326,7 +2339,11 @@ ReadXenium <- function(
           amount = 0
         )
 
-        col.use = c(x_location = 'x', y_location = 'y', feature_name = 'gene')
+        col.use = c(
+          x_location = letters[24+flip.xy],
+          y_location = letters[25-flip.xy],
+          feature_name = 'gene'
+        )
         
         for(option in Filter(function(x) x$req, list(
           list(
@@ -2363,10 +2380,9 @@ ReadXenium <- function(
       intersect(
         names(meta),
         c(
-          'run_name', 'run_start_time', 'region_name',
-          'preservation_method', 'panel_name', 'panel_organism',
-          'panel_tissue_type', 'instrument_sw_version',
-          'segmentation_stain'
+          'run_start_time', 'preservation_method', 'panel_name',
+          'panel_organism', 'panel_tissue_type',
+          'instrument_sw_version', 'segmentation_stain'
         )
       )
     ]
diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd
index 88395262f..d63f6b4cf 100644
--- a/vignettes/seurat5_spatial_vignette_2.Rmd
+++ b/vignettes/seurat5_spatial_vignette_2.Rmd
@@ -218,23 +218,87 @@ ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], c
 
 This dataset is a preview of the Xenium multimodal cell segmentation solution using a development version of the assay user guide and analysis software. It uses the [Xenium Human Multi-Tissue and Cancer Panel](https://www.10xgenomics.com/support/in-situ-gene-expression/documentation/steps/panel-design/pre-designed-xenium-gene-expression-panels) (377 genes) which was pre-designed by 10x Genomics. In this vignette, we will demonstrate how to load Xenium data for analysis and visualization using Seurat and, in particular, how to parse and visualize cell metadata. Note that this vignette requires the use of some optional dependencies in Seurat, namely `stars`, `jsonlite` and `gmp` in order to read data from `.zarr` files.
 
-This uses the full Xenium output bundle available from the [FFPE Human Lung Cancer with Xenium Multimodal Cell Segmentation Preview Data](https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard) page, which can be downloaded as described below.
+This uses the full Xenium output bundle available from the [FFPE Human Lung Cancer with Xenium Multimodal Cell Segmentation Preview Data](https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard) page, which can be downloaded as described below (note that this file is \~7 GB).
 
 ```{bash, eval=FALSE}
 wget https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_humanLung_Cancer_FFPE/Xenium_V1_humanLung_Cancer_FFPE_outs.zip
 unzip Xenium_V1_humanLung_Cancer_FFPE_outs.zip
 ```
 
-We will first load in the dataset and create the Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. Note that this dataset is moderate size. Installing `arrow` will permit you to load the data from Parquet files, which is much more efficient than from csv.
+We will first load in the dataset and create the Seurat object. We will flip the x/y coordinates for more convenient plotting. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. Installing `arrow` will permit you to load the data from Parquet files, which is much more efficient than from csv.
+
+By default, the subcellular coordinates of each Q20 transcript will be loaded, as well as the cell centroids, which can commonly take up more than 1 GB of RAM.
 
 ```{r, results='hide'}
-path <- "~/yard/data/Xenium_V1_humanLung_Cancer_FFPE_outs"
-# Load the Xenium data
-xenium.obj <- LoadXenium(path, fov = "fov")
+path <- "/brahms/hartmana/vignette_data/Xenium_V1_humanLung_Cancer_FFPE_outs"
+# Load the Xenium data, including cell segmentations
+xenium.obj <- LoadXenium(path, fov = "fov", segmentations = "cell", flip.xy = T)
 # remove cells with 0 counts
 xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)
 ```
 
+This dataset uses Xenium multimodal segmentation, which involves custom deep learning models trained on Xenium In Situ data. After nuclei segmentation with DAPI, the algorithm uses three methods to segment cells. The segmentation results for each cell are prioritized in this order:
+
+1.  **Cell boundary stain:** This is the most reliable method. Antibodies target epithelial markers (CD45) and immune markers (pan-lymphocyte: ATP1A1, E-Cadherin). It can split nuclei and define cells missing a nucleus. Nuclei that overlap with anucleate cells are assigned to the cell
+
+2.  **Expansion from the nucleus to the cell interior stain edge:** This method requires both segmented nuclei and the interior stain (18S rRNA marker)
+
+3.  **Nuclear expansion:** For cases where cells that do not have boundary or interior stains, segment cells with a nuclear (DAPI) expansion distance of 5 µm or until another cell boundary is encountered
+
+We can directly visualize cells which were segmented according to each method.
+
+```{r}
+ImageDimPlot(xenium.obj, fov = "fov", dark.background = F, group.by = "segmentation_method", cols = c('#ffabc3', '#a9a900', '#a9ceff'))
+```
+
+It is also possible to load and visualize the unsupervised cluster annotations computed by the Xenium Onboard Analysis pipeline, which are stored in the `analysis` folder of an output bundle.
+
+```{r}
+where <- tempdir()
+untar(file.path(data.dir, 'analysis.tar.gz'), exdir = where)
+
+graph_clusters <- read.csv(file.path(where, 'analysis', 'clustering', 'gene_expression_graphclust', 'clusters.csv'), row.names = 'Barcode')
+
+# Store the graph-based clusters in the metadata
+xenium.obj <- AddMetaData(xenium.obj, graph_clusters)
+
+ImageDimPlot(xenium.obj, fov = "fov", dark.background = F, group.by = "Cluster")
+```
+
+Differential expression results from Xenium Onboard Analysis can also be loaded in a similar fashion.
+
+```{r}
+diff_exp <- read.csv(file.path(where, 'analysis', 'diffexp', 'gene_expression_graphclust', 'differential_expression.csv'))
+
+diff_exp <- melt(diff_exp, id.vars = c("Feature.ID", "Feature.Name"))
+
+colnames(diff_exp)[1:2] <- c('ensembl_id', 'gene_name')
+diff_exp$cluster <- unlist(lapply(strsplit(as.character(diff_exp$variable), '.', fixed = T), '[[', 2))
+diff_exp$measure <- factor(gsub('Cluster\\.\\d+\\.', '', as.character(diff_exp$variable)), c('Mean.Counts', 'Log2.fold.change', 'Adjusted.p.value'), c('mean_count', 'log2_fc', 'p_adj'))
+diff_exp$variable <- NULL
+
+diff_exp <- dcast(diff_exp, ensembl_id + gene_name + cluster ~ measure)
+
+significant_de <- subset(diff_exp, p_adj <= 0.05)
+significant_de <- significant_de[order(significant_de$mean_count, decreasing = T), ]
+significant_de[!duplicated(significant_de$cluster), ]
+```
+
+We will zoom in to visualize cell segmentations and expression of a select few marker genes.
+
+```{r}
+cropped.coords <- Crop(xenium.obj[["fov"]], x = c(6700, 7400), y = c(1500, 2000), coords = "plot")
+xenium.obj[["zoom"]] <- cropped.coords
+# visualize cropped area with cell segmentations & selected molecules
+DefaultBoundary(xenium.obj[["zoom"]]) <- "segmentation"
+ImageDimPlot(xenium.obj, fov = "zoom", group.by = "Cluster",
+             axes = TRUE, border.color = "white", border.size = 0.1,
+             cols = "polychrome", coord.fixed = FALSE,
+             molecules = c("SNTN", "MALL", "MS4A1", "IL7R", "CYP2B6"), nmols = 10000, mols.cols = RColorBrewer::brewer.pal(5, "Set3"), alpha = 0.4)
+```
+
+Lots of valuable data is output directly in each run, allowing for rapid interrogation of the biology. In the following vignette, we will see how we can use standard Seurat workflows to do more involved secondary analysis on Xenium data.
+
 # Mouse Brain: 10x Genomics Xenium In Situ
 
 In this section we'll analyze data produced by the Xenium platform. The vignette demonstrates how to load the per-transcript location data, cell x gene matrix, cell segmentation, and cell centroid information available in the Xenium outputs. The resulting Seurat object will contain the gene expression profile of each cell, the centroid and boundary of each cell, and the location of each individual detected transcript. The per-cell gene expression profiles are similar to standard single-cell RNA-seq and can be analyzed using the same tools.
@@ -246,12 +310,12 @@ wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_Co
 unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip
 ```
 
-First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute.
+First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute (you can improve this by installing `arrow` and `hdf5r`).
 
 ```{r load.xenium, results='hide'}
 path <- "/brahms/hartmana/vignette_data/xenium_tiny_subset"
 # Load the Xenium data
-xenium.obj <- LoadXenium(path, fov = "fov")
+xenium.obj <- LoadXenium(path, fov = "fov", segmentations = "cell")
 # remove cells with 0 counts
 xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)
 ```

From e0ad8f1d3e8c60235e40b10b7c99a422fe7ad391 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Mon, 18 Mar 2024 16:13:31 -0700
Subject: [PATCH 005/166] cleaner errors for missing files/arrow

---
 R/preprocessing.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 4f6171830..0631eacac 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2173,6 +2173,10 @@ ReadXenium <- function(
           if(!inherits(matrix, "try-error")) { break }
         }
         
+        if(!exists('matrix') || inherits(matrix, "try-error")) {
+          stop("Xenium outputs were incomplete: missing cell_feature_matrix")
+        }
+        
         pmtx(type = "finish")
         matrix
       },
@@ -2254,6 +2258,10 @@ ReadXenium <- function(
           if(!inherits(cell_info, "try-error")) { break }
         }
         
+        if(!exists('cell_info') || inherits(cell_info, "try-error")) {
+          stop("Xenium outputs were incomplete: missing cells")
+        }
+        
         cell_info <- cell_info[, names(col.use)]
         colnames(cell_info) <- col.use
         
@@ -2286,6 +2294,10 @@ ReadXenium <- function(
           if(!inherits(cell_boundaries_df, "try-error")) { break }
         }
         
+        if(!exists('cell_boundaries_df') || inherits(cell_boundaries_df, "try-error")) {
+          stop("Xenium outputs were incomplete: missing cell_boundaries")
+        }
+        
         colnames(cell_boundaries_df) <- c(
           'cell',
           letters[24 + flip.xy],
@@ -2321,6 +2333,10 @@ ReadXenium <- function(
           if(!inherits(nucleus_boundaries_df, "try-error")) { break }
         }
         
+        if(!exists('cell_info') || inherits(cell_info, "try-error")) {
+          stop("Xenium outputs were incomplete: missing nucleus_boundaries")
+        }
+        
         colnames(nucleus_boundaries_df) <- c(
           'cell',
           letters[24 + flip.xy],
@@ -2362,6 +2378,15 @@ ReadXenium <- function(
           if(!inherits(transcripts, "try-error")) { break }
         }
         
+        if(!exists('transcripts') || inherits(transcripts, "try-error")) {
+          hint <- ""
+          if(file.exists(file.path(data.dir, "transcripts.parquet"))) {
+            hint <- " Xenium outputs no longer include `transcripts.csv.gz`. Instead, please install `arrow` to read transcripts.parquet"
+          }
+          
+          stop(paste0("Xenium outputs were incomplete: missing transcripts.", hint))
+        }
+        
         transcripts <- transcripts[, names(col.use)]
         colnames(transcripts) <- col.use
         

From ae0616936142f05a82aabeaed66bee71d67aa046 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Thu, 2 May 2024 12:03:20 -0700
Subject: [PATCH 006/166] doc: update some documentation (#7)

---
 R/convenience.R | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 0170fad31..d32d732e8 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -172,7 +172,7 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
 
 #' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object
 #'
-#' @param data.dir Path to folder containing Nanostring SMI outputs
+#' @param data.dir Path to folder containing Xenium outputs
 #' @param fov FOV name
 #' @param assay Assay name
 #' @param mols.qv.threshold Remove transcript molecules with
@@ -182,6 +182,8 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
 #' @param molecule.coordinates Whether or not to load molecule pixel coordinates
 #' @param segmentations One of "cell", "nucleus" or NULL (to load either cell
 #' segmentations, nucleus segmentations or neither)
+#' @param flip.xy Whether or not to flip the x/y coordinates of the Xenium outputs
+#' to match what is displayed in Xenium Explorer, or fit on your screen better.
 #'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
 #' CreateSegmentation CreateSeuratObject CreateMolecules
@@ -203,13 +205,13 @@ LoadXenium <- function(
   if(!is.null(segmentations) && !(segmentations %in% c('nucleus', 'cell'))) {
     stop('segmentations must be NULL or one of "nucleus", "cell"')
   }
-  
+
   if(!cell.centroids && is.null(segmentations)) {
     stop(
       "Must load either centroids or cell/nucleus segmentations"
     )
   }
-  
+
   data <- ReadXenium(
     data.dir = data.dir,
     type = c("centroids", "segmentations", "nucleus_segmentations")[
@@ -221,7 +223,7 @@ LoadXenium <- function(
     mols.qv.threshold = mols.qv.threshold,
     flip.xy = flip.xy
   )
-  
+
   segmentations <- intersect(c("segmentations", "nucleus_segmentations"), names(data))
 
   segmentations.data <- Filter(Negate(is.null), list(
@@ -238,7 +240,7 @@ LoadXenium <- function(
       NULL
     }
   ))
-  
+
   coords <- if(length(segmentations.data) > 0) {
     CreateFOV(
       segmentations.data,
@@ -252,7 +254,7 @@ LoadXenium <- function(
   } else {
     NULL
   }
-  
+
   slot.map <- c(
     `Blank Codeword` = 'BlankCodeword',
     `Unassigned Codeword` = 'BlankCodeword',
@@ -261,15 +263,15 @@ LoadXenium <- function(
   )
 
   xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
-  
+
   if(!is.null(data$metadata)) {
     Misc(xenium.obj, 'run_metadata') <- data$metadata
   }
-  
+
   if(!is.null(data$segmentation_method)) {
     xenium.obj <- AddMetaData(xenium.obj, data$segmentation_method)
   }
-  
+
   for(name in intersect(names(slot.map), names(data$matrix))) {
     xenium.obj[[slot.map[name]]] <- CreateAssayObject(counts = data$matrix[[name]])
   }

From 1f1c5a04be44b67cfc046e1b46635ba24d62beac Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Tue, 7 May 2024 15:54:00 -0400
Subject: [PATCH 007/166] sketching updates

---
 R/sketching.R | 68 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 18 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index 8bb260595..7d29ae715 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -49,6 +49,7 @@ SketchData <- function(
   seed = 123L,
   cast = 'dgCMatrix',
   verbose = TRUE,
+  features = NULL,
   ...
 ) {
   assay <- assay[1L] %||% DefaultAssay(object = object)
@@ -78,6 +79,7 @@ SketchData <- function(
       over.write = over.write,
       seed = seed,
       verbose = FALSE,
+      features = features,
       ...
     )
   } else if (method == 'Uniform') {
@@ -88,22 +90,43 @@ SketchData <- function(
   }
   leverage.score <- object[[var.name]]
   layers.data <- Layers(object = object[[assay]], search = 'data')
-  cells <- lapply(
-    X = seq_along(along.with = layers.data),
-    FUN = function(i, seed) {
-      set.seed(seed = seed)
-      lcells <- Cells(x = object[[assay]], layer = layers.data[i])
-      if (length(x = lcells) < ncells) {
-        return(lcells)
-      }
-      return(sample(
+  cells <- list()
+  for (i in seq_along(layers.data)){
+    set.seed(seed = seed) # does this need to be set in the forloop? is it getting updated somehow 
+    lyr <- layers.data[i]
+    if (length(ncells) == 1) { # use the same number of cells per layer 
+      ncells.lyr <- ncells
+    } else {
+      ncells.lyr <- ncells[i]
+    }
+    lcells <- Cells(x = object[[assay]], layer = lyr)
+    if (length(x = lcells) < ncells.lyr) {
+      cells[[i]] <- lcells
+    } else {
+      cells[[i]] <- sample(
         x = lcells,
-        size = ncells,
+        size = ncells.lyr,
         prob = leverage.score[lcells,]
-      ))
-    },
+      )
+    }
     seed = seed
-  )
+  }
+  # cells <- lapply(
+  #   X = seq_along(along.with = layers.data),
+  #   FUN = function(i, seed) {
+  #     set.seed(seed = seed)
+  #     lcells <- Cells(x = object[[assay]], layer = layers.data[i])
+  #     if (length(x = lcells) < ncells) {
+  #       return(lcells)
+  #     }
+  #     return(sample(
+  #       x = lcells,
+  #       size = ncells,
+  #       prob = leverage.score[lcells,]
+  #     ))
+  #   },
+  #   seed = seed
+  # )
   sketched <- suppressWarnings(expr = subset(
     x = object[[assay]],
     cells = unlist(cells),
@@ -498,6 +521,7 @@ LeverageScore.StdAssay <- function(
   eps = 0.5,
   seed = 123L,
   verbose = TRUE,
+  features = NULL,
   ...
 ) {
   layer <- unique(x = layer) %||% DefaultLayer(object = object)
@@ -513,15 +537,21 @@ LeverageScore.StdAssay <- function(
     if (isTRUE(x = verbose)) {
       message("Running LeverageScore for layer ", l)
     }
+    features <- features %||% tryCatch({
+      VariableFeatures(
+        object = object,
+        method = vf.method,
+        layer = l
+      )
+    }, error = function(e) {
+      stop("Unable to get Variable Features from layer ", l, ". Try providing `features` argument instead.")
+    })
+    
     scores[Cells(x = object, layer = l), 1] <- LeverageScore(
       object = LayerData(
         object = object,
         layer = l,
-        features = VariableFeatures(
-          object = object,
-          method = vf.method,
-          layer = l
-        ),
+        features = features,
         fast = TRUE
       ),
       nsketch = nsketch,
@@ -572,6 +602,7 @@ LeverageScore.Seurat <- function(
   eps = 0.5,
   seed = 123L,
   verbose = TRUE,
+  features = NULL,
   ...
 ) {
   if (!over.write) {
@@ -590,6 +621,7 @@ LeverageScore.Seurat <- function(
     eps = eps,
     seed = seed,
     verbose = verbose,
+    features = features,
     ...
   )
   names(x = scores) <- var.name

From 96445cd0acb683b7b620750c613b2819515f2ace Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Tue, 7 May 2024 15:54:50 -0400
Subject: [PATCH 008/166] sketching updates

---
 R/sketching.R | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index 7d29ae715..071d1cd42 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -111,22 +111,6 @@ SketchData <- function(
     }
     seed = seed
   }
-  # cells <- lapply(
-  #   X = seq_along(along.with = layers.data),
-  #   FUN = function(i, seed) {
-  #     set.seed(seed = seed)
-  #     lcells <- Cells(x = object[[assay]], layer = layers.data[i])
-  #     if (length(x = lcells) < ncells) {
-  #       return(lcells)
-  #     }
-  #     return(sample(
-  #       x = lcells,
-  #       size = ncells,
-  #       prob = leverage.score[lcells,]
-  #     ))
-  #   },
-  #   seed = seed
-  # )
   sketched <- suppressWarnings(expr = subset(
     x = object[[assay]],
     cells = unlist(cells),

From d72d54c71a8a00cd3cc711d5ebfea0ceadc66f21 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Tue, 7 May 2024 16:01:06 -0400
Subject: [PATCH 009/166] error text

---
 R/sketching.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sketching.R b/R/sketching.R
index 071d1cd42..6c0e46df1 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -528,7 +528,7 @@ LeverageScore.StdAssay <- function(
         layer = l
       )
     }, error = function(e) {
-      stop("Unable to get Variable Features from layer ", l, ". Try providing `features` argument instead.")
+      stop("Unable to get variable features from layer `", l, "`. Try providing `features` argument instead.")
     })
     
     scores[Cells(x = object, layer = l), 1] <- LeverageScore(

From c8ea915358b5b7aba9a69a21cbf7089240763223 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Thu, 9 May 2024 14:19:04 -0400
Subject: [PATCH 010/166] feature naming bug

---
 R/sketching.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index 6c0e46df1..0d51f5c36 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -521,7 +521,7 @@ LeverageScore.StdAssay <- function(
     if (isTRUE(x = verbose)) {
       message("Running LeverageScore for layer ", l)
     }
-    features <- features %||% tryCatch({
+    features.use <- features %||% tryCatch({
       VariableFeatures(
         object = object,
         method = vf.method,
@@ -535,7 +535,7 @@ LeverageScore.StdAssay <- function(
       object = LayerData(
         object = object,
         layer = l,
-        features = features,
+        features = features.use,
         fast = TRUE
       ),
       nsketch = nsketch,

From effa95538126f421383de02675e8cac89bd641a0 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Thu, 16 May 2024 14:09:41 -0400
Subject: [PATCH 011/166] update names of variables

---
 R/sketching.R | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index 0d51f5c36..80d3fafc1 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -89,23 +89,23 @@ SketchData <- function(
     object[[var.name]] <- 1
   }
   leverage.score <- object[[var.name]]
-  layers.data <- Layers(object = object[[assay]], search = 'data')
+  layer.names <- Layers(object = object[[assay]], search = 'data')
   cells <- list()
-  for (i in seq_along(layers.data)){
-    set.seed(seed = seed) # does this need to be set in the forloop? is it getting updated somehow 
-    lyr <- layers.data[i]
+  for (i in seq_along(layer.names)){
+    set.seed(seed = seed) 
+    layer.name <- layer.names[i]
     if (length(ncells) == 1) { # use the same number of cells per layer 
-      ncells.lyr <- ncells
+      ncells.layer <- ncells
     } else {
-      ncells.lyr <- ncells[i]
+      ncells.layer <- ncells[i]
     }
-    lcells <- Cells(x = object[[assay]], layer = lyr)
-    if (length(x = lcells) < ncells.lyr) {
+    lcells <- Cells(x = object[[assay]], layer = layer.name)
+    if (length(x = lcells) < ncells.layer) {
       cells[[i]] <- lcells
     } else {
       cells[[i]] <- sample(
         x = lcells,
-        size = ncells.lyr,
+        size = ncells.layer,
         prob = leverage.score[lcells,]
       )
     }
@@ -116,10 +116,10 @@ SketchData <- function(
     cells = unlist(cells),
     layers = Layers(object = object[[assay]], search = c('counts', 'data'))
   ))
-  for (lyr in layers.data) {
+  for (layer.name in layers.names) {
     try(
-      expr = VariableFeatures(object = sketched, method = "sketch", layer = lyr) <-
-        VariableFeatures(object = object[[assay]], layer = lyr),
+      expr = VariableFeatures(object = sketched, method = "sketch", layer = layer.name) <-
+        VariableFeatures(object = object[[assay]], layer = layer.name),
       silent = TRUE
     )
   }

From 58172fdd6b1d931d4dee2617cf4911aa54d45b4f Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Fri, 17 May 2024 16:57:44 -0400
Subject: [PATCH 012/166] enable named lists

---
 R/sketching.R | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index 80d3fafc1..a9fb547a5 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -90,20 +90,21 @@ SketchData <- function(
   }
   leverage.score <- object[[var.name]]
   layer.names <- Layers(object = object[[assay]], search = 'data')
+  if (length(ncells) == 1) {
+    ncells <- rep(ncells, length(layer.names))
+  }
+  if (is.null(names(ncells))) {
+    names(ncells) <- layer.names
+  }
   cells <- list()
-  for (i in seq_along(layer.names)){
+  for (layer.name in layer.names){
     set.seed(seed = seed) 
-    layer.name <- layer.names[i]
-    if (length(ncells) == 1) { # use the same number of cells per layer 
-      ncells.layer <- ncells
-    } else {
-      ncells.layer <- ncells[i]
-    }
+    ncells.layer <- ncells[[layer.name]]
     lcells <- Cells(x = object[[assay]], layer = layer.name)
     if (length(x = lcells) < ncells.layer) {
-      cells[[i]] <- lcells
+      cells[[layer.name]] <- lcells
     } else {
-      cells[[i]] <- sample(
+      cells[[layer.name]] <- sample(
         x = lcells,
         size = ncells.layer,
         prob = leverage.score[lcells,]
@@ -116,7 +117,7 @@ SketchData <- function(
     cells = unlist(cells),
     layers = Layers(object = object[[assay]], search = c('counts', 'data'))
   ))
-  for (layer.name in layers.names) {
+  for (layer.name in layer.names) {
     try(
       expr = VariableFeatures(object = sketched, method = "sketch", layer = layer.name) <-
         VariableFeatures(object = object[[assay]], layer = layer.name),

From 7238eae1d4f70e3feb506b9edcaef85399b5891a Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Fri, 17 May 2024 17:20:50 -0400
Subject: [PATCH 013/166] initializing tests

---
 tests/testthat/test_sketching.R | 70 +++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 tests/testthat/test_sketching.R

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
new file mode 100644
index 000000000..35e98749f
--- /dev/null
+++ b/tests/testthat/test_sketching.R
@@ -0,0 +1,70 @@
+# Tests for sketching related fxns
+set.seed(42)
+pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small))
+
+# Setup test object
+pbmc_small <- NormalizeData(pbmc_small, verbose = FALSE)
+pbmc_small <- FindVariableFeatures(pbmc_small)
+
+# Tests for SketchData
+# ------------------------------------------------------------------------------
+context("SketchData")
+
+test_that("SketchData defaults work", {
+  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+})
+
+test_that("SketchData with named list works", {
+  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = c("data" = 50), method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+})
+
+pbmc_split <- split(pbmc_small, f = pbmc_small$groups)
+pbmc_split <- FindVariableFeatures(pbmc_split)
+
+test_that("SketchData with multiple layers works", { # (and one is less than the number of cells in that layer)
+  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = 10, method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+})
+
+test_that("SketchData with a different number of cells per layer works", {
+  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+
+test_that("SketchData with a different number of cells per layer and a named list works", {
+  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c("data.a" = 50, "data.b" = 30), method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+
+test_that("SketchData with specified features works", {
+  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+
+test_that("SketchData with specified features and multiple layers works", {
+  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+
+pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small))
+pbmc_small <- NormalizeData(pbmc_small, verbose = FALSE)
+VariableFeatures(pbmc_small) <- rownames(pbmc_small)[1:100]
+test_that("SketchData when setting your own variable features and specifying features works", {
+  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+
+
+test_that("SketchData when setting your own variable features and not specifying features errors out", {
+  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")
+  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+})
+

From 88197888f74b45717dbb07e380abd8eead5429e3 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Mon, 20 May 2024 17:10:26 -0400
Subject: [PATCH 014/166] mapply code

---
 R/sketching.R | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/R/sketching.R b/R/sketching.R
index a9fb547a5..726a98e3c 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -96,22 +96,31 @@ SketchData <- function(
   if (is.null(names(ncells))) {
     names(ncells) <- layer.names
   }
-  cells <- list()
-  for (layer.name in layer.names){
-    set.seed(seed = seed) 
-    ncells.layer <- ncells[[layer.name]]
-    lcells <- Cells(x = object[[assay]], layer = layer.name)
-    if (length(x = lcells) < ncells.layer) {
-      cells[[layer.name]] <- lcells
-    } else {
-      cells[[layer.name]] <- sample(
-        x = lcells,
-        size = ncells.layer,
-        prob = leverage.score[lcells,]
-      )
-    }
-    seed = seed
-  }
+  # align index of `ncells` with `layer.names`
+  ncells <- ncells[layer.names]
+  cells <- mapply(
+    function(
+    layer.name,
+    ncells.layer
+    ) {
+      if (!is.null(seed)) {
+        set.seed(seed)
+      }
+      cells.layer <- Cells(object[[assay]], layer = layer.name)
+      if (length(cells.layer) < ncells.layer) {
+        cells.to.keep <- cells.layer
+      } else {
+        cells.to.keep <- sample(
+          x = cells.layer,
+          size = ncells.layer,
+          prob = leverage.score[cells.layer,]
+        )
+      }
+      return (cells.to.keep)
+    },
+    layer.names,
+    ncells
+  )
   sketched <- suppressWarnings(expr = subset(
     x = object[[assay]],
     cells = unlist(cells),

From 97f32fda24f63d6e8c332e538b6b2284c4c80c22 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Mon, 20 May 2024 18:07:39 -0400
Subject: [PATCH 015/166] finalizing tests

---
 tests/testthat/test_sketching.R | 75 +++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
index 35e98749f..878365a45 100644
--- a/tests/testthat/test_sketching.R
+++ b/tests/testthat/test_sketching.R
@@ -11,60 +11,79 @@ pbmc_small <- FindVariableFeatures(pbmc_small)
 context("SketchData")
 
 test_that("SketchData defaults work", {
-  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
+                              sketched.assay = "sketch"))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.9036446, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
+  pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = c("data" = 50), method = "LeverageScore", sketched.assay = "sketch"))
+  expect_equal(dim(pbmc_sketched_2[["sketch"]]), dim(pbmc_sketched[["sketch"]]))
+  expect_equal(as.numeric(pbmc_sketched_2$leverage.score[1]), as.numeric(pbmc_sketched$leverage.score[1]), tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched_2[["sketch"]])[1], colnames(pbmc_sketched[["sketch"]])[1])
 })
 
-test_that("SketchData with named list works", {
-  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = c("data" = 50), method = "LeverageScore", sketched.assay = "sketch")
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-})
 
-pbmc_split <- split(pbmc_small, f = pbmc_small$groups)
+pbmc_split <- suppressWarnings(merge(pbmc_small, pbmc_small))
+pbmc_split$groups <- sample(c("g1", "g2"), ncol(pbmc_split), replace = T)
+pbmc_split <- JoinLayers(pbmc_split)
+pbmc_split <- split(pbmc_split, f = pbmc_split$groups)
 pbmc_split <- FindVariableFeatures(pbmc_split)
 
 test_that("SketchData with multiple layers works", { # (and one is less than the number of cells in that layer)
-  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = 10, method = "LeverageScore", sketched.assay = "sketch")
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = 80, method = "LeverageScore", 
+                              sketched.assay = "sketch"))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,75))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,80))
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.4864473, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
 })
 
 test_that("SketchData with a different number of cells per layer works", {
-  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", sketched.assay = "sketch")
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
-})
-
-test_that("SketchData with a different number of cells per layer and a named list works", {
-  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c("data.a" = 50, "data.b" = 30), method = "LeverageScore", sketched.assay = "sketch")
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
+                              sketched.assay = "sketch"))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.4864473, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
+  pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c("data.g2" = 50, "data.g1" = 30), 
+                                method = "LeverageScore", sketched.assay = "sketch"))
+  expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g1), dim(pbmc_sketched[["sketch"]]$data.g1))
+  expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g2), dim(pbmc_sketched[["sketch"]]$data.g2))
+  expect_equal(as.numeric(pbmc_sketched_2$leverage.score[1]), as.numeric(pbmc_sketched$leverage.score[1]), 
+               tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched_2[["sketch"]])[1], colnames(pbmc_sketched[["sketch"]])[1])
 })
 
 test_that("SketchData with specified features works", {
-  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
+                              sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100]))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7202897, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
 })
 
 test_that("SketchData with specified features and multiple layers works", {
-  pbmc_sketched <- SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
+                              sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100]))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.4807881, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
 })
 
-pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small))
+pbmc_small <- CreateSeuratObject(pbmc_small[["RNA"]]$counts)
 pbmc_small <- NormalizeData(pbmc_small, verbose = FALSE)
 VariableFeatures(pbmc_small) <- rownames(pbmc_small)[1:100]
 test_that("SketchData when setting your own variable features and specifying features works", {
-  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])
+  pbmc_sketched <- suppressWarnings(suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
+                                                                sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.9029593, tolerance = 1e-4)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
 })
 
 
 test_that("SketchData when setting your own variable features and not specifying features errors out", {
-  pbmc_sketched <- SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  table(pbmc3k_split_sketched@meta.data[colnames(pbmc3k_split_sketched[["sketch"]]),]$random)
+  expect_error(suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")))
 })
 

From 45053b6b7f00337e6af62ddeff491a5388b411ef Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Mon, 20 May 2024 18:39:06 -0400
Subject: [PATCH 016/166] use pbmc_small groups

---
 tests/testthat/test_sketching.R | 35 ++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
index 878365a45..6a182e563 100644
--- a/tests/testthat/test_sketching.R
+++ b/tests/testthat/test_sketching.R
@@ -24,27 +24,28 @@ test_that("SketchData defaults work", {
 
 
 pbmc_split <- suppressWarnings(merge(pbmc_small, pbmc_small))
-pbmc_split$groups <- sample(c("g1", "g2"), ncol(pbmc_split), replace = T)
-pbmc_split <- JoinLayers(pbmc_split)
-pbmc_split <- split(pbmc_split, f = pbmc_split$groups)
+pbmc_split <- suppressWarnings(split(pbmc_split, f = pbmc_split$groups))
 pbmc_split <- FindVariableFeatures(pbmc_split)
 
 test_that("SketchData with multiple layers works", { # (and one is less than the number of cells in that layer)
+  set.seed(42)
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = 80, method = "LeverageScore", 
                               sketched.assay = "sketch"))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,75))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,80))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.4864473, tolerance = 1e-6)
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,80))
+  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,72))
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.6290191, tolerance = 1e-6)
   expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
 })
 
 test_that("SketchData with a different number of cells per layer works", {
+  set.seed(42)
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
                               sketched.assay = "sketch"))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.4864473, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.6290191, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "GAACCTGATGAACC_1")
+  set.seed(42)
   pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c("data.g2" = 50, "data.g1" = 30), 
                                 method = "LeverageScore", sketched.assay = "sketch"))
   expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g1), dim(pbmc_sketched[["sketch"]]$data.g1))
@@ -63,27 +64,29 @@ test_that("SketchData with specified features works", {
 })
 
 test_that("SketchData with specified features and multiple layers works", {
+  set.seed(42)
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
                               sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100]))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.4807881, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7197844, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "GAACCTGATGAACC_1")
 })
 
-pbmc_small <- CreateSeuratObject(pbmc_small[["RNA"]]$counts)
-pbmc_small <- NormalizeData(pbmc_small, verbose = FALSE)
-VariableFeatures(pbmc_small) <- rownames(pbmc_small)[1:100]
+pbmc_new <- CreateSeuratObject(pbmc_small[["RNA"]]$counts)
+pbmc_new <- NormalizeData(pbmc_new, verbose = FALSE)
+VariableFeatures(pbmc_new) <- rownames(pbmc_new)[1:100]
 test_that("SketchData when setting your own variable features and specifying features works", {
-  pbmc_sketched <- suppressWarnings(suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
+  set.seed(42)
+  pbmc_sketched <- suppressWarnings(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", 
                                                                 sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.9029593, tolerance = 1e-4)
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7202896, tolerance = 1e-6)
   expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
 })
 
 
 test_that("SketchData when setting your own variable features and not specifying features errors out", {
-  expect_error(suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")))
+  expect_error(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")))
 })
 

From 05c5102219070d387f9143a6b2d5ca381c8b302d Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Mon, 3 Jun 2024 11:06:15 -0400
Subject: [PATCH 017/166] specify data layer in LeverageScore

---
 R/sketching.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/sketching.R b/R/sketching.R
index 726a98e3c..9fd68dce1 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -611,7 +611,7 @@ LeverageScore.Seurat <- function(
     ndims = ndims,
     method = method,
     vf.method = vf.method,
-    layer = layer,
+    layer = 'data',
     eps = eps,
     seed = seed,
     verbose = verbose,

From b0e81eed9a116544dc84afca960cebddfd9fde18 Mon Sep 17 00:00:00 2001
From: Gesmira <gesmiramolla@gmail.com>
Date: Mon, 3 Jun 2024 11:06:46 -0400
Subject: [PATCH 018/166] update seed

---
 tests/testthat/test_sketching.R | 37 +++++++++++++++------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
index 6a182e563..dae43cea4 100644
--- a/tests/testthat/test_sketching.R
+++ b/tests/testthat/test_sketching.R
@@ -1,5 +1,4 @@
 # Tests for sketching related fxns
-set.seed(42)
 pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small))
 
 # Setup test object
@@ -12,7 +11,7 @@ context("SketchData")
 
 test_that("SketchData defaults work", {
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
-                              sketched.assay = "sketch"))
+                              sketched.assay = "sketch", set.seed = 42))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
   expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.9036446, tolerance = 1e-6)
   expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
@@ -28,26 +27,23 @@ pbmc_split <- suppressWarnings(split(pbmc_split, f = pbmc_split$groups))
 pbmc_split <- FindVariableFeatures(pbmc_split)
 
 test_that("SketchData with multiple layers works", { # (and one is less than the number of cells in that layer)
-  set.seed(42)
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = 80, method = "LeverageScore", 
-                              sketched.assay = "sketch"))
+                              sketched.assay = "sketch", set.seed = 42))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,80))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,72))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.6290191, tolerance = 1e-6)
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7471112, tolerance = 1e-6)
   expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
 })
 
 test_that("SketchData with a different number of cells per layer works", {
-  set.seed(42)
   pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
-                              sketched.assay = "sketch"))
+                              sketched.assay = "sketch", set.seed = 42))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]),  0.6290191, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "GAACCTGATGAACC_1")
-  set.seed(42)
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.6220129, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1],  "ATGCCAGAACGACT_1")
   pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c("data.g2" = 50, "data.g1" = 30), 
-                                method = "LeverageScore", sketched.assay = "sketch"))
+                                method = "LeverageScore", sketched.assay = "sketch", set.seed = 42))
   expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g1), dim(pbmc_sketched[["sketch"]]$data.g1))
   expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g2), dim(pbmc_sketched[["sketch"]]$data.g2))
   expect_equal(as.numeric(pbmc_sketched_2$leverage.score[1]), as.numeric(pbmc_sketched$leverage.score[1]), 
@@ -64,22 +60,22 @@ test_that("SketchData with specified features works", {
 })
 
 test_that("SketchData with specified features and multiple layers works", {
-  set.seed(42)
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
-                              sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100]))
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), 
+                              method = "LeverageScore", sketched.assay = "sketch",
+                              features = VariableFeatures(pbmc_small)[1:100], set.seed = 42))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
   expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7197844, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "GAACCTGATGAACC_1")
+  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7324468, tolerance = 1e-6)
+  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
 })
 
 pbmc_new <- CreateSeuratObject(pbmc_small[["RNA"]]$counts)
 pbmc_new <- NormalizeData(pbmc_new, verbose = FALSE)
 VariableFeatures(pbmc_new) <- rownames(pbmc_new)[1:100]
 test_that("SketchData when setting your own variable features and specifying features works", {
-  set.seed(42)
-  pbmc_sketched <- suppressWarnings(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", 
-                                                                sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100])))
+  pbmc_sketched <- suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, 
+                              method = "LeverageScore", sketched.assay = "sketch", 
+                              features = VariableFeatures(pbmc_small)[1:100], set.seed = 42))
   expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
   expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7202896, tolerance = 1e-6)
   expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
@@ -87,6 +83,7 @@ test_that("SketchData when setting your own variable features and specifying fea
 
 
 test_that("SketchData when setting your own variable features and not specifying features errors out", {
-  expect_error(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", sketched.assay = "sketch")))
+  expect_error(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", 
+                          sketched.assay = "sketch")))
 })
 

From 910b7dad9e8595f04372ddb2fdc2d31b8d7338bd Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 5 Jun 2024 12:00:13 -0400
Subject: [PATCH 019/166] Tidy test_sketching.R

---
 tests/testthat/test_sketching.R | 328 +++++++++++++++++++++++++-------
 1 file changed, 264 insertions(+), 64 deletions(-)

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
index dae43cea4..1ad31892e 100644
--- a/tests/testthat/test_sketching.R
+++ b/tests/testthat/test_sketching.R
@@ -1,89 +1,289 @@
-# Tests for sketching related fxns
-pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small))
+# setup shared test fixtures
+path_to_counts <- system.file("extdata", "pbmc_raw.txt", package = "Seurat")
+
+
+build_test_data <- function(multi_layer = FALSE) {
+  counts <- read.table(path_to_counts, sep = "\t", row.names = 1)
+  counts <- as.sparse(as.matrix(counts))
+
+  if (multi_layer) {
+    barcodes <- colnames(counts)
+
+    counts.layer1 <- counts
+    colnames(counts.layer1) <- paste0(barcodes, "_layer1")
+
+    counts.layer2 <- counts
+    colnames(counts.layer1) <- paste0(barcodes, "_layer2")
+
+    counts_layers <- c(
+      counts.layer1 = counts.layer1,
+      counts.layer2 = counts.layer2
+    )
+
+    test_data <- CreateSeuratObject(counts_layers)
+    
+  } else{
+    test_data <- CreateSeuratObject(counts)
+  }
+
+  test_data <- NormalizeData(test_data, verbose = FALSE)
+  test_data <- FindVariableFeatures(test_data, verbose = FALSE)
+
+  return (test_data)
+}
 
-# Setup test object
-pbmc_small <- NormalizeData(pbmc_small, verbose = FALSE)
-pbmc_small <- FindVariableFeatures(pbmc_small)
 
-# Tests for SketchData
-# ------------------------------------------------------------------------------
 context("SketchData")
 
 test_that("SketchData defaults work", {
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
-                              sketched.assay = "sketch", set.seed = 42))
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.9036446, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
-  pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = c("data" = 50), method = "LeverageScore", sketched.assay = "sketch"))
-  expect_equal(dim(pbmc_sketched_2[["sketch"]]), dim(pbmc_sketched[["sketch"]]))
-  expect_equal(as.numeric(pbmc_sketched_2$leverage.score[1]), as.numeric(pbmc_sketched$leverage.score[1]), tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched_2[["sketch"]])[1], colnames(pbmc_sketched[["sketch"]])[1])
-})
+  test_case <- build_test_data()
 
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = 50, 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]), 
+    c(230, 50)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.9036446, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1], 
+    "ATGCCAGAACGACT"
+  )
 
-pbmc_split <- suppressWarnings(merge(pbmc_small, pbmc_small))
-pbmc_split <- suppressWarnings(split(pbmc_split, f = pbmc_split$groups))
-pbmc_split <- FindVariableFeatures(pbmc_split)
+  result_2 <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = c("data" = 50), 
+      method = "LeverageScore", 
+      sketched.assay = "sketch"
+    )
+  )
+  expect_equal(
+    dim(result_2[["sketch"]]), 
+    dim(result[["sketch"]])
+  )
+  expect_equal(
+    as.numeric(result_2$leverage.score[1]), 
+    as.numeric(result$leverage.score[1]), 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result_2[["sketch"]])[1], 
+    colnames(result[["sketch"]])[1]
+  )
+})
 
 test_that("SketchData with multiple layers works", { # (and one is less than the number of cells in that layer)
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = 80, method = "LeverageScore", 
-                              sketched.assay = "sketch", set.seed = 42))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,80))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,72))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7471112, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
+  test_case <- build_test_data(multi_layer = TRUE)
+  
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = 80, 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer1), 
+    c(230, 80)
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer2), 
+    c(230, 80)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.7471112, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1], 
+    "ATGCCAGAACGACT_layer2"
+  )
 })
 
 test_that("SketchData with a different number of cells per layer works", {
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), method = "LeverageScore", 
-                              sketched.assay = "sketch", set.seed = 42))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.6220129, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1],  "ATGCCAGAACGACT_1")
-  pbmc_sketched_2 <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c("data.g2" = 50, "data.g1" = 30), 
-                                method = "LeverageScore", sketched.assay = "sketch", set.seed = 42))
-  expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g1), dim(pbmc_sketched[["sketch"]]$data.g1))
-  expect_equal(dim(pbmc_sketched_2[["sketch"]]$data.g2), dim(pbmc_sketched[["sketch"]]$data.g2))
-  expect_equal(as.numeric(pbmc_sketched_2$leverage.score[1]), as.numeric(pbmc_sketched$leverage.score[1]), 
-               tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched_2[["sketch"]])[1], colnames(pbmc_sketched[["sketch"]])[1])
+  test_case <- build_test_data(multi_layer = TRUE)
+  
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = c(50, 30), 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer1), 
+    c(230, 50)
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer2), 
+    c(230, 30)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.6220129, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1],  
+    "ATGCCAGAACGACT_layer2"
+  )
+
+  result_2 <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = c("data.layer1" = 50, "data.layer2" = 30), 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result_2[["sketch"]]$data.layer1), 
+    dim(result[["sketch"]]$data.layer1)
+  )
+  expect_equal(
+    dim(result_2[["sketch"]]$data.layer2), 
+    dim(result[["sketch"]]$data.layer2)
+  )
+  expect_equal(
+    as.numeric(result_2$leverage.score[1]), 
+    as.numeric(result$leverage.score[1]), 
+    tolerance = 1e-5
+  )
+  expect_equal(
+    colnames(result_2[["sketch"]])[1], 
+    colnames(result[["sketch"]])[1]
+  )
 })
 
 test_that("SketchData with specified features works", {
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_small, assay = "RNA", ncells = 50, method = "LeverageScore", 
-                              sketched.assay = "sketch", features = VariableFeatures(pbmc_small)[1:100]))
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7202897, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
+  test_case <- build_test_data()
+  
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = 50, 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      features = VariableFeatures(test_case)[1:100]
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]), 
+    c(230, 50)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.7202897, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1], 
+    "ATGCCAGAACGACT"
+  )
 })
 
 test_that("SketchData with specified features and multiple layers works", {
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_split, assay = "RNA", ncells = c(50, 30), 
-                              method = "LeverageScore", sketched.assay = "sketch",
-                              features = VariableFeatures(pbmc_small)[1:100], set.seed = 42))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g1), c(230,30))
-  expect_equal(dim(pbmc_sketched[["sketch"]]$data.g2), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7324468, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT_1")
+  test_case <- build_test_data(multi_layer = TRUE)
+
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = c(50, 30), 
+      method = "LeverageScore", 
+      sketched.assay = "sketch",
+      features = VariableFeatures(test_case)[1:100], 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer1), 
+    c(230, 50)
+  )
+  expect_equal(
+    dim(result[["sketch"]]$data.layer2), 
+    c(230, 30)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.7324468, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1], 
+    "ATGCCAGAACGACT_layer2"
+  )
 })
 
-pbmc_new <- CreateSeuratObject(pbmc_small[["RNA"]]$counts)
-pbmc_new <- NormalizeData(pbmc_new, verbose = FALSE)
-VariableFeatures(pbmc_new) <- rownames(pbmc_new)[1:100]
 test_that("SketchData when setting your own variable features and specifying features works", {
-  pbmc_sketched <- suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, 
-                              method = "LeverageScore", sketched.assay = "sketch", 
-                              features = VariableFeatures(pbmc_small)[1:100], set.seed = 42))
-  expect_equal(dim(pbmc_sketched[["sketch"]]), c(230,50))
-  expect_equal(as.numeric(pbmc_sketched$leverage.score[1]), 0.7202896, tolerance = 1e-6)
-  expect_equal(colnames(pbmc_sketched[["sketch"]])[1], "ATGCCAGAACGACT")
+  test_case <- build_test_data()
+  top_features <- VariableFeatures(test_case)[1:100]
+  VariableFeatures(test_case) <- top_features
+  
+  result <- suppressWarnings(
+    SketchData(
+      test_case, 
+      assay = "RNA", 
+      ncells = 50, 
+      method = "LeverageScore", 
+      sketched.assay = "sketch", 
+      features = top_features, 
+      set.seed = 42
+    )
+  )
+  expect_equal(
+    dim(result[["sketch"]]), 
+    c(230,50)
+  )
+  expect_equal(
+    as.numeric(result$leverage.score[1]), 
+    0.7202896, 
+    tolerance = 1e-6
+  )
+  expect_equal(
+    colnames(result[["sketch"]])[1], 
+    "ATGCCAGAACGACT"
+  )
 })
 
-
 test_that("SketchData when setting your own variable features and not specifying features errors out", {
-  expect_error(suppressWarnings(SketchData(pbmc_new, assay = "RNA", ncells = 50, method = "LeverageScore", 
-                          sketched.assay = "sketch")))
+  test_case <- build_test_data()
+  top_features <- VariableFeatures(test_case)[1:100]
+  VariableFeatures(test_case) <- top_features
+  
+  expect_error(
+    suppressWarnings(
+      SketchData(
+        pbmc_new, 
+        assay = "RNA", 
+        ncells = 50, 
+        method = "LeverageScore", 
+        sketched.assay = "sketch"
+      )
+    )
+  )
 })
-

From 35384f470901ec4a3a45daddf6721ff2b5bc9e0b Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 5 Jun 2024 12:19:02 -0400
Subject: [PATCH 020/166] Tweak values and tolerances for test_sketching.R

---
 tests/testthat/test_sketching.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/testthat/test_sketching.R b/tests/testthat/test_sketching.R
index 1ad31892e..2f724d036 100644
--- a/tests/testthat/test_sketching.R
+++ b/tests/testthat/test_sketching.R
@@ -55,7 +55,7 @@ test_that("SketchData defaults work", {
   expect_equal(
     as.numeric(result$leverage.score[1]), 
     0.9036446, 
-    tolerance = 1e-6
+    tolerance = 1e-5
   )
   expect_equal(
     colnames(result[["sketch"]])[1], 
@@ -109,7 +109,7 @@ test_that("SketchData with multiple layers works", { # (and one is less than the
   )
   expect_equal(
     as.numeric(result$leverage.score[1]), 
-    0.7471112, 
+    0.9036446, 
     tolerance = 1e-6
   )
   expect_equal(
@@ -141,8 +141,8 @@ test_that("SketchData with a different number of cells per layer works", {
   )
   expect_equal(
     as.numeric(result$leverage.score[1]), 
-    0.6220129, 
-    tolerance = 1e-6
+    0.9036446, 
+    tolerance = 1e-5
   )
   expect_equal(
     colnames(result[["sketch"]])[1],  
@@ -230,7 +230,7 @@ test_that("SketchData with specified features and multiple layers works", {
   )
   expect_equal(
     as.numeric(result$leverage.score[1]), 
-    0.7324468, 
+    0.7202896, 
     tolerance = 1e-6
   )
   expect_equal(

From 7d30dfd4926229fc7cb2e55b049af9f8654b37da Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 11 Jun 2024 07:18:56 -0400
Subject: [PATCH 021/166] Update ncells param description in SketchData

---
 R/sketching.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/sketching.R b/R/sketching.R
index 9fd68dce1..2a34b4081 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -20,7 +20,9 @@ NULL
 #'
 #' @param object A Seurat object.
 #' @param assay Assay name. Default is NULL, in which case the default assay of the object is used.
-#' @param ncells A positive integer indicating the number of cells to sample for the sketching. Default is 5000.
+#' @param ncells A positive integer or a named vector/list specifying the 
+#' number of cells to sample per layer. If a single integer is provided, the 
+#' same number of cells will be sampled from each layer. Default is 5000.
 #' @param sketched.assay Sketched assay name. A  sketch assay is created or overwrite with the sketch data. Default is 'sketch'.
 #' @param method  Sketching method to use. Can be 'LeverageScore' or 'Uniform'.
 #'               Default is 'LeverageScore'.

From fde3293786331fd55efacf3c1f35ef9ad4d74132 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 11 Jun 2024 07:20:20 -0400
Subject: [PATCH 022/166] Add features param description to LeverageScore

---
 R/sketching.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/sketching.R b/R/sketching.R
index 2a34b4081..c2e8535c5 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -498,6 +498,7 @@ LeverageScore.default <- function(
 #'            defaults to 0.5.
 #' @param seed A positive integer. The seed for the random number generator, defaults to 123.
 #' @param verbose Print progress and diagnostic messages
+#' @param features A vector of feature names to use for calculating leverage score. 
 #'
 #' @importFrom SeuratObject EmptyDF
 #'

From c51dad0c7b6467be73b2d3554c7eac4151820a6d Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 10 Jun 2024 17:34:56 -0400
Subject: [PATCH 023/166] Update NEWS

---
 NEWS.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 97514e05b..3a6fc74ec 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# Unreleased
+
+## Changes
+- Added `features` parameter to `LeverageScore` and `SketchData`
+- Updated `SketchData`'s `ncells` parameter to accept integer vector
+
 # Seurat 5.1.0 (2024-05-08)
 
 ## Changes

From 995920ffb96bff29c7c7b8c420a68579c4607d11 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 10 Jun 2024 17:35:31 -0400
Subject: [PATCH 024/166] Update docs

---
 man/LeverageScore.Rd | 5 +++++
 man/SketchData.Rd    | 5 ++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd
index 240d9d9c0..6b48c92fc 100644
--- a/man/LeverageScore.Rd
+++ b/man/LeverageScore.Rd
@@ -31,6 +31,7 @@ LeverageScore(object, ...)
   eps = 0.5,
   seed = 123L,
   verbose = TRUE,
+  features = NULL,
   ...
 )
 
@@ -44,6 +45,7 @@ LeverageScore(object, ...)
   eps = 0.5,
   seed = 123L,
   verbose = TRUE,
+  features = NULL,
   ...
 )
 
@@ -60,6 +62,7 @@ LeverageScore(object, ...)
   eps = 0.5,
   seed = 123L,
   verbose = TRUE,
+  features = NULL,
   ...
 )
 }
@@ -87,6 +90,8 @@ defaults to 0.5.}
 
 \item{layer}{layer to use}
 
+\item{features}{A vector of feature names to use for calculating leverage score.}
+
 \item{assay}{assay to use}
 
 \item{var.name}{name of slot to store leverage scores}
diff --git a/man/SketchData.Rd b/man/SketchData.Rd
index 62ad1e9cc..97ffc2d78 100644
--- a/man/SketchData.Rd
+++ b/man/SketchData.Rd
@@ -15,6 +15,7 @@ SketchData(
   seed = 123L,
   cast = "dgCMatrix",
   verbose = TRUE,
+  features = NULL,
   ...
 )
 }
@@ -23,7 +24,9 @@ SketchData(
 
 \item{assay}{Assay name. Default is NULL, in which case the default assay of the object is used.}
 
-\item{ncells}{A positive integer indicating the number of cells to sample for the sketching. Default is 5000.}
+\item{ncells}{A positive integer or a named vector/list specifying the 
+number of cells to sample per layer. If a single integer is provided, the 
+same number of cells will be sampled from each layer. Default is 5000.}
 
 \item{sketched.assay}{Sketched assay name. A  sketch assay is created or overwrite with the sketch data. Default is 'sketch'.}
 

From fc770fe9db7383801f0ba3752afb13ca60aaccae Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 10 Jun 2024 17:35:41 -0400
Subject: [PATCH 025/166] Bump version

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6e0bd1b18..984ce8c39 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 5.1.0
-Date: 2024-05-08
+Version: 5.1.0.9001
+Date: 2024-06-10
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From a9ca7f0f60b15e06448e53d9e81edfdf0d53a3ea Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Mon, 24 Jun 2024 09:44:54 -0700
Subject: [PATCH 026/166] fix: load genomic controls when present

---
 R/convenience.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/convenience.R b/R/convenience.R
index d32d732e8..c402cfc6f 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -259,7 +259,8 @@ LoadXenium <- function(
     `Blank Codeword` = 'BlankCodeword',
     `Unassigned Codeword` = 'BlankCodeword',
     `Negative Control Codeword` = 'ControlCodeword',
-    `Negative Control Probe` = 'ControlProbe'
+    `Negative Control Probe` = 'ControlProbe',
+    `Genomic Control` = 'GenomicControl'
   )
 
   xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)

From 97283d51790da22ec1b3300d08e8dd4a75b0d921 Mon Sep 17 00:00:00 2001
From: Ben Parks <bnprks+git@gmail.com>
Date: Fri, 31 May 2024 21:28:03 -0700
Subject: [PATCH 027/166] PrepDR5 with BPCells remove dense matrix conversion

I think this is the likely culprit for issue #8391
---
 R/dimensional_reduction.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 298118440..30c8b06d7 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2432,7 +2432,11 @@ PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRU
   if (!length(x = features)) {
     stop("No variable features, run FindVariableFeatures() or provide a vector of features", call. = FALSE)
   }
-  features.var <- apply(X = data.use, MARGIN = 1L, FUN = var)
+  if (is(data.use, "IterableMatrix")) {
+    features.var <- BPCells::matrix_stats(matrix=data.use, row_stats="variance")$row_stats["variance",]
+  } else {
+    features.var <- apply(X = data.use, MARGIN = 1L, FUN = var)
+  }
   features.keep <- features[features.var > 0]
   if (!length(x = features.keep)) {
     stop("None of the requested features have any variance", call. = FALSE)

From 0e0ed07b3e2ada932e67d6bb2619d3f8ff853dcc Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 1 Jul 2024 14:12:00 -0400
Subject: [PATCH 028/166] Update NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 3a6fc74ec..a6a5e9d9b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`
 - Updated `SketchData`'s `ncells` parameter to accept integer vector
 

From e7a202b6ef66e306ab708f6b3681873d371d0ef9 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 1 Jul 2024 14:12:27 -0400
Subject: [PATCH 029/166] Bump version

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 984ce8c39..a0b161ec6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 5.1.0.9001
-Date: 2024-06-10
+Version: 5.1.0.9002
+Date: 2024-07-02
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From b2022308f301d0a750bc665affdd637ce3a10475 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 10 Jan 2024 13:41:13 +0100
Subject: [PATCH 030/166] add `simplify = FALSE` to return df

..see issue #8265
---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 1de879390..85cad5327 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2409,7 +2409,7 @@ ReadXenium <- function(
       },
       stop("Unknown Xenium input type: ", otype)
     )
-  }, USE.NAMES = TRUE)
+  }, simplify = FALSE, USE.NAMES = TRUE)
   return(data)
 }
 

From 34c915813e8926e034e6657c03da6c305be63cda Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 22 Jul 2024 10:50:54 -0400
Subject: [PATCH 031/166] Update NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index a6a5e9d9b..c098ea0b8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Fixed `ReadXenium` to properly parse multiple molecular outputs at once ([#8265](https://github.com/satijalab/seurat/issues/8265))
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`
 - Updated `SketchData`'s `ncells` parameter to accept integer vector

From 18a41a05d2897d7d0c9edbc2c782b3fd394311de Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 22 Jul 2024 10:51:30 -0400
Subject: [PATCH 032/166] Bump version

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a0b161ec6..eee21fda5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 5.1.0.9002
-Date: 2024-07-02
+Version: 5.1.0.9003
+Date: 2024-07-22
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 0ea2c99066076d253952a6da5b0a3d5c762179ea Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 10 Jan 2024 13:39:09 -0800
Subject: [PATCH 033/166] quick fix

---
 R/convenience.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 5bea600fc..c4554addb 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -201,10 +201,11 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
   )
 
   xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
-  if("Blank Codeword" %in% names(data$matrix))
+  if("Blank Codeword" %in% names(data$matrix)) {
     xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
-  else
+  } else if("Unassigned Codeword" %in% names(data$matrix)) {
     xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Unassigned Codeword"]])
+  }
   xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
   xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])
 

From a08e01d1f829db7c10f57e7fe0bb297e1d1b570a Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 22 Jul 2024 10:53:32 -0400
Subject: [PATCH 034/166] Update NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index c098ea0b8..d3b17bb2e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Fixed `LoadXenium` to accommodate datasets without "Blank Codeword" or "Unassigned Codeword" matrices
 - Fixed `ReadXenium` to properly parse multiple molecular outputs at once ([#8265](https://github.com/satijalab/seurat/issues/8265))
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`

From 8556048ed6a37001ab766b43fe338ea62119ca75 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 22 Jul 2024 10:53:51 -0400
Subject: [PATCH 035/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index eee21fda5..ddaf72db8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9003
+Version: 5.1.0.9004
 Date: 2024-07-22
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.

From 5649b270faf721d79e421d0497f77a15a99dc070 Mon Sep 17 00:00:00 2001
From: yuhanH <yh1970@nyu.edu>
Date: Thu, 18 Jul 2024 17:44:54 -0400
Subject: [PATCH 036/166] add bpcells for slsi

---
 R/dimensional_reduction.R | 49 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 30c8b06d7..2e4c1ca0b 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2669,7 +2669,12 @@ RunSLSI.default <- function(
   if (verbose) {
     message("Smoothing peaks matrix")
   }
-  object.smooth <- t(x = graph) %*% (t(x = object) %*% object) %*% graph
+  if (inherits(x = object, what = 'IterableMatrix')) {
+    t_object <- t(BPCells::transpose_storage_order(matrix = object))
+    object.smooth <- t(x = graph) %*% (t_object %*% object) %*% graph
+  } else {
+    object.smooth <- t(x = graph) %*% (t(x = object) %*% object) %*% graph
+  }
   if (verbose) {
     message("Performing eigendecomposition")
   }
@@ -2735,6 +2740,48 @@ RunSLSI.Assay <- function(
   return(reduction.data)
 }
 
+
+#' @param features Features to compute SLSI on. If features=NULL, SLSI will be run
+#' using the variable features for the Assay5.
+#' @param layer Layer to run SLSI on
+#'
+#' @rdname RunSLSI
+#' @concept dimensional_reduction
+#' @export
+#' @method RunSLSI Assay5
+#'
+RunSLSI.Assay5 <- function(
+    object,
+    assay = NULL,
+    features = NULL,
+    n = 50,
+    reduction.key = "SLSI_",
+    graph = NULL,
+    layer = "data",
+    verbose = TRUE,
+    seed.use = 42,
+    ...) {
+  data.use <- PrepDR5(
+    object = object,
+    features = features,
+    layer = layer,
+    verbose = verbose
+  )
+  reduction.data <- RunSLSI(
+    object = data.use,
+    assay = assay,
+    npcs = npcs,
+    reduction.key = reduction.key,
+    graph = graph,
+    verbose = verbose,
+    seed.use = seed.use,
+    ...
+  )
+  return(reduction.data)
+}
+
+
+
 #' @param reduction.name dimensional reduction name
 #' @rdname RunSLSI
 #' @concept dimensional_reduction

From da51eb47a2b3bbc43a716ed9c42c3f8912fb8794 Mon Sep 17 00:00:00 2001
From: yuhanH <yh1970@nyu.edu>
Date: Sat, 27 Jul 2024 14:46:27 -0400
Subject: [PATCH 037/166] update docu for slsi

---
 NAMESPACE           |  1 +
 man/RunSLSI.Rd      | 20 ++++++++++++++++++--
 src/RcppExports.cpp |  2 +-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 005325e25..892a3568b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -106,6 +106,7 @@ S3method(RunPCA,Seurat5)
 S3method(RunPCA,StdAssay)
 S3method(RunPCA,default)
 S3method(RunSLSI,Assay)
+S3method(RunSLSI,Assay5)
 S3method(RunSLSI,Seurat)
 S3method(RunSLSI,default)
 S3method(RunSPCA,Assay)
diff --git a/man/RunSLSI.Rd b/man/RunSLSI.Rd
index 5b7a05ad6..1c8f81db6 100644
--- a/man/RunSLSI.Rd
+++ b/man/RunSLSI.Rd
@@ -4,6 +4,7 @@
 \alias{RunSLSI}
 \alias{RunSLSI.default}
 \alias{RunSLSI.Assay}
+\alias{RunSLSI.Assay5}
 \alias{RunSLSI.Seurat}
 \title{Run Supervised Latent Semantic Indexing}
 \usage{
@@ -32,6 +33,19 @@ RunSLSI(object, ...)
   ...
 )
 
+\method{RunSLSI}{Assay5}(
+  object,
+  assay = NULL,
+  features = NULL,
+  n = 50,
+  reduction.key = "SLSI_",
+  graph = NULL,
+  layer = "data",
+  verbose = TRUE,
+  seed.use = 42,
+  ...
+)
+
 \method{RunSLSI}{Seurat}(
   object,
   assay = NULL,
@@ -63,8 +77,10 @@ the number for the dimension names}
 
 \item{seed.use}{Set a random seed. Setting NULL will not set a seed.}
 
-\item{features}{Features to compute SLSI on. If NULL, SLSI will be run
-using the variable features for the Assay.}
+\item{features}{Features to compute SLSI on. If features=NULL, SLSI will be run
+using the variable features for the Assay5.}
+
+\item{layer}{Layer to run SLSI on}
 
 \item{reduction.name}{dimensional reduction name}
 }
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 7a3302c6b..540e5c2d8 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -402,7 +402,7 @@ BEGIN_RCPP
 END_RCPP
 }
 
-RcppExport SEXP isnull(SEXP);
+RcppExport SEXP isnull(void *);
 
 static const R_CallMethodDef CallEntries[] = {
     {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9},

From a0cd4dd64b45e1dd474e2e4ca934235f1b9ec7e5 Mon Sep 17 00:00:00 2001
From: Yuhan Hao <yh1970@nyu.edu>
Date: Sun, 18 Aug 2024 00:33:44 -0400
Subject: [PATCH 038/166] Update R/dimensional_reduction.R

Co-authored-by: David Collins <23369610+dcollins15@users.noreply.github.com>
---
 R/dimensional_reduction.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 2e4c1ca0b..41cc925d3 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2750,7 +2750,7 @@ RunSLSI.Assay <- function(
 #' @export
 #' @method RunSLSI Assay5
 #'
-RunSLSI.Assay5 <- function(
+RunSLSI.StdAssay <- function(
     object,
     assay = NULL,
     features = NULL,

From 982c1597c7ca563bd786e2c3e07f2dd998e99c36 Mon Sep 17 00:00:00 2001
From: yuhanH <yh1970@nyu.edu>
Date: Sun, 18 Aug 2024 00:40:13 -0400
Subject: [PATCH 039/166] update news

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index d3b17bb2e..b437053b9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Updated `RunSLSI` to support `BPCells` matrices
 - Fixed `LoadXenium` to accommodate datasets without "Blank Codeword" or "Unassigned Codeword" matrices
 - Fixed `ReadXenium` to properly parse multiple molecular outputs at once ([#8265](https://github.com/satijalab/seurat/issues/8265))
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices

From e1f97f68b261ad4588b58cfa9abab0f187f06e18 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 12 Sep 2024 18:04:31 -0400
Subject: [PATCH 040/166] Fixup docstring for RunSLSI.StdAssay

---
 R/dimensional_reduction.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 41cc925d3..4450054fe 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2748,7 +2748,7 @@ RunSLSI.Assay <- function(
 #' @rdname RunSLSI
 #' @concept dimensional_reduction
 #' @export
-#' @method RunSLSI Assay5
+#' @method RunSLSI StdAssay
 #'
 RunSLSI.StdAssay <- function(
     object,

From 539beb78257ec979e6c56f5530665e1609705af9 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 12 Sep 2024 16:34:33 -0400
Subject: [PATCH 041/166] Bump version

---
 DESCRIPTION | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ddaf72db8..e1e441a1f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 5.1.0.9004
-Date: 2024-07-22
+Version: 5.1.0.9005
+Date: 2024-09-12
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
@@ -114,7 +114,7 @@ Collate:
     'sketching.R'
     'tree.R'
     'utilities.R'
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Suggests:
     ape,

From c6876c12cbd13412a2f48005c900db9ce8a79076 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 12 Sep 2024 18:04:57 -0400
Subject: [PATCH 042/166] Update docs

---
 NAMESPACE           | 2 +-
 man/RunSLSI.Rd      | 4 ++--
 src/RcppExports.cpp | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 892a3568b..f50cd4712 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -106,8 +106,8 @@ S3method(RunPCA,Seurat5)
 S3method(RunPCA,StdAssay)
 S3method(RunPCA,default)
 S3method(RunSLSI,Assay)
-S3method(RunSLSI,Assay5)
 S3method(RunSLSI,Seurat)
+S3method(RunSLSI,StdAssay)
 S3method(RunSLSI,default)
 S3method(RunSPCA,Assay)
 S3method(RunSPCA,Assay5)
diff --git a/man/RunSLSI.Rd b/man/RunSLSI.Rd
index 1c8f81db6..65b79ad74 100644
--- a/man/RunSLSI.Rd
+++ b/man/RunSLSI.Rd
@@ -4,7 +4,7 @@
 \alias{RunSLSI}
 \alias{RunSLSI.default}
 \alias{RunSLSI.Assay}
-\alias{RunSLSI.Assay5}
+\alias{RunSLSI.StdAssay}
 \alias{RunSLSI.Seurat}
 \title{Run Supervised Latent Semantic Indexing}
 \usage{
@@ -33,7 +33,7 @@ RunSLSI(object, ...)
   ...
 )
 
-\method{RunSLSI}{Assay5}(
+\method{RunSLSI}{StdAssay}(
   object,
   assay = NULL,
   features = NULL,
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 540e5c2d8..7a3302c6b 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -402,7 +402,7 @@ BEGIN_RCPP
 END_RCPP
 }
 
-RcppExport SEXP isnull(void *);
+RcppExport SEXP isnull(SEXP);
 
 static const R_CallMethodDef CallEntries[] = {
     {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9},

From 41adcb48fc7e13a6359a98def41c726863d1ea0c Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Mon, 16 Sep 2024 14:59:25 -0700
Subject: [PATCH 043/166] Typo

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 4a311d9ae..26203b185 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2575,7 +2575,7 @@ ReadXenium <- function(
       },
       stop("Unknown Xenium input type: ", otype)
     )
-  }, SIMPLIFY = FALSE, USE.NAMES = TRUE)
+  }, simplify = FALSE, USE.NAMES = TRUE)
   
   metadata <- file.path(data.dir, "experiment.xenium")
   if(file.exists(metadata) && requireNamespace("jsonlite", quietly = TRUE)) {

From 23a20374a849513982fd85c58ff363b089f56d6d Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 18 Sep 2024 10:28:54 -0700
Subject: [PATCH 044/166] fix: copypasta on nucleus_boundaries

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 26203b185..9c91cb4f2 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2512,7 +2512,7 @@ ReadXenium <- function(
           if(!inherits(nucleus_boundaries_df, "try-error")) { break }
         }
         
-        if(!exists('cell_info') || inherits(cell_info, "try-error")) {
+        if(!exists('nucleus_boundaries_df') || inherits(nucleus_boundaries_df, "try-error")) {
           stop("Xenium outputs were incomplete: missing nucleus_boundaries")
         }
         

From 7454eb0488c1c352fa992360fda0c819322f39a9 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 18 Sep 2024 10:43:47 -0700
Subject: [PATCH 045/166] fix: don't load segmentation method on old datasets

---
 R/preprocessing.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 9c91cb4f2..899e6254c 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2376,12 +2376,18 @@ ReadXenium <- function(
           tempdir <- path.expand(tempdir())
           unzip(file.path(data.dir, "cells.zarr.zip"), exdir = tempdir)
           zattr <- jsonlite::read_json(file.path(tempdir, '.zattrs'))
+
+          # Segmentation method only available in datasets versioned 6.0+
+          if(zattr$major_version < 6) {
+            return(NULL)
+          }
+
           which_entry <- which(unlist(zattr$polygon_set_names) == 'cell')
           
-          indices <- stars::read_mdim(file.path(tempdir, 'polygon_sets', which_entry - 1, 'cell_index'))$cell_index + 1
+          indices <- stars::read_mdim(file.path(tempdir, "polygon_sets", which_entry - 1, "cell_index"))$cell_index + 1
           indices[is.na(indices)] <- 1
           
-          ids <- stars::read_mdim(file.path(tempdir, 'cell_id'))$cell_id
+          ids <- stars::read_mdim(file.path(tempdir, "cell_id"))$cell_id
           ids[is.na(ids)] <- 0
           
           ids <- paste0(

From fb0435499317929cc625020aaab5fe5dc5e2028a Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 18 Sep 2024 11:02:32 -0700
Subject: [PATCH 046/166] fix: read cells.parquet or csv.gz instead of zarr.zip

---
 R/preprocessing.R | 80 ++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 46 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 899e6254c..1692097ad 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2360,56 +2360,44 @@ ReadXenium <- function(
         matrix
       },
       'segmentation_method' = {
-        if(!requireNamespace("stars", quietly = TRUE) || !requireNamespace("jsonlite", quietly = TRUE) || !requireNamespace("gmp", quietly = TRUE)) {
-          warning("Reading segmentation_method requires the `stars`, `gmp` and `jsonlite` packages")
-          return(NULL)
+        pcents <- progressor()
+        pcents(
+          message = 'Loading cell metadata',
+          class = 'sticky',
+          amount = 0
+        )
+        
+        col.use <- c(
+          cell_id = 'cell',
+          segmentation_method = 'segmentation_method'
+        )
+        
+        for(option in Filter(function(x) x$req, list(
+          list(
+            filename = "cells.parquet",
+            fn = function(x) as.data.frame(arrow::read_parquet(x, col_select = names(col.use))),
+            req = has_arrow
+          ),
+          list(
+            filename = "cells.csv.gz",
+            fn = function(x) data.table::fread(x, data.table = FALSE, stringsAsFactors = FALSE, select = names(col.use)),
+            req = has_dt
+          ),
+          list(filename = "cells.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
+        ))) {
+          cell_info <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(cell_info, "try-error")) { break }
         }
         
-        if(file.exists(file.path(data.dir, "cells.zarr.zip"))) {
-          pcents <- progressor()
-          pcents(
-            message = 'Loading cell metadata',
-            class = 'sticky',
-            amount = 0
-          )
-          
-          tempdir <- path.expand(tempdir())
-          unzip(file.path(data.dir, "cells.zarr.zip"), exdir = tempdir)
-          zattr <- jsonlite::read_json(file.path(tempdir, '.zattrs'))
-
-          # Segmentation method only available in datasets versioned 6.0+
-          if(zattr$major_version < 6) {
-            return(NULL)
-          }
-
-          which_entry <- which(unlist(zattr$polygon_set_names) == 'cell')
-          
-          indices <- stars::read_mdim(file.path(tempdir, "polygon_sets", which_entry - 1, "cell_index"))$cell_index + 1
-          indices[is.na(indices)] <- 1
-          
-          ids <- stars::read_mdim(file.path(tempdir, "cell_id"))$cell_id
-          ids[is.na(ids)] <- 0
-          
-          ids <- paste0(
-            gsub(' ', 'a', sprintf('%8s', sapply(
-              strsplit(as.character(gmp::as.bigz(ids[1,]), 16), ''),
-              function(id) {
-                rawToChar(as.raw(sapply(id, function(x) {
-                  as.numeric(charToRaw(x)) +
-                    ifelse(is.na(suppressWarnings(as.numeric(x))), 10, 49)
-                })))
-              }
-            ))), '-', ids[2,])
-          
-          method <- stars::read_mdim(file.path(tempdir, 'polygon_sets', which_entry - 1, 'method'))$method + 1
-          method[is.na(method)] <- 1
+        if(!exists('cell_info') || inherits(cell_info, "try-error")) {
+          NULL
+        } else {
+          cell_info <- cell_info[, names(col.use)]
+          colnames(cell_info) <- col.use
           
-          segmentation_method <- unlist(zattr$segmentation_methods)[method]
+          pcents(type = 'finish')
           
-          pcents(type = "finish")
-          data.frame(segmentation_method = segmentation_method, row.names = ids)
-        } else {
-          NULL
+          data.frame(segmentation_method = cell_info$segmentation_method, row.names = cell_info$cell_id)
         }
       },
       'centroids' = {

From efcb50785833cd572c08c014762d2d7e153dd938 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Mon, 23 Sep 2024 16:41:02 -0700
Subject: [PATCH 047/166] fix: parse arrow binary type

---
 R/preprocessing.R | 130 +++++++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 54 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 1692097ad..40a8c6144 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -503,7 +503,7 @@ GetResidual <- function(
 #' tissue
 #' @param to.upper Converts all feature names to upper case. Can be useful when
 #' analyses require comparisons between human and mouse gene names for example.
-#' @param image \code{VisiumV1}/\code{VisiumV2} instance(s) - if a vector is 
+#' @param image \code{VisiumV1}/\code{VisiumV2} instance(s) - if a vector is
 #' passed in it should be co-indexed with \code{`bin.size`}
 #' @param ... Arguments passed to \code{\link{Read10X_h5}}
 #'
@@ -658,7 +658,7 @@ Load10X_Spatial <- function (
 #'
 #' @export
 #' @concept preprocessing
-#' 
+#'
 Read10X_probe_metadata <- function(
   data.dir,
   filename = 'raw_probe_bc_matrix.h5'
@@ -1210,7 +1210,7 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) {
 #' \code{scalefactors_json.json} and \code{tissue_positions_list.csv}
 #' @param image.name PNG file to read in
 #' @param assay Name of associated assay
-#' @param slice Name for the image, used to populate the instance's key 
+#' @param slice Name for the image, used to populate the instance's key
 #' @param filter.matrix Filter spot/feature matrix to only include spots that
 #' have been determined to be over tissue
 #'
@@ -1283,29 +1283,29 @@ Read10X_Image <- function(
 Read10X_Coordinates <- function(filename, filter.matrix) {
   # output columns names
   col.names <- c("barcodes", "tissue", "row", "col", "imagerow", "imagecol")
-  
+
   # if the coordinate mappings are in a parquet file
   if (tools::file_ext(filename) == "parquet") {
     # `arrow` must be installed to read parquet files
     if (!requireNamespace("arrow", quietly = TRUE)) {
       stop("Please install arrow to read parquet files")
     }
-    
+
     # read in coordinates and conver the resulting tibble into a data.frame
     coordinates <- as.data.frame(arrow::read_parquet(filename))
     # normalize column names for consistency with other datatypes
     input.col.names <- c(
-      "barcode", 
-      "in_tissue", 
-      "array_row", 
-      "array_col", 
-      "pxl_row_in_fullres", 
+      "barcode",
+      "in_tissue",
+      "array_row",
+      "array_col",
+      "pxl_row_in_fullres",
       "pxl_col_in_fullres"
     )
     col.map <- stats::setNames(col.names, input.col.names)
     colnames(coordinates) <- ifelse(
-      colnames(coordinates) %in% names(col.map), 
-      col.map[colnames(coordinates)], 
+      colnames(coordinates) %in% names(col.map),
+      col.map[colnames(coordinates)],
       colnames(coordinates)
     )
 
@@ -2337,13 +2337,25 @@ ReadXenium <- function(
   has_arrow <- requireNamespace("arrow", quietly = TRUE)
   has_hdf5r <- requireNamespace("hdf5r", quietly = TRUE)
 
+  binary_to_string <- function(arrow_binary) {
+    if(typeof(arrow_binary) == 'list') {
+      unlist(
+        lapply(
+          arrow_binary, function(x) rawToChar(as.raw(strtoi(x, 16L)))
+        )
+      )
+    } else {
+      arrow_binary
+    }
+  }
+
   data <- sapply(outs, function(otype) {
     switch(
       EXPR = otype,
       'matrix' = {
         pmtx <- progressor()
         pmtx(message = 'Reading counts matrix', class = 'sticky', amount = 0)
-        
+
         for(option in Filter(function(x) x$req, list(
           list(filename = "cell_feature_matrix.h5", fn = Read10X_h5, req = has_hdf5r),
           list(filename = "cell_feature_matrix", fn = Read10X, req = TRUE)
@@ -2351,27 +2363,27 @@ ReadXenium <- function(
           matrix <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
           if(!inherits(matrix, "try-error")) { break }
         }
-        
+
         if(!exists('matrix') || inherits(matrix, "try-error")) {
           stop("Xenium outputs were incomplete: missing cell_feature_matrix")
         }
-        
+
         pmtx(type = "finish")
         matrix
       },
       'segmentation_method' = {
-        pcents <- progressor()
-        pcents(
+        psegs <- progressor()
+        psegs(
           message = 'Loading cell metadata',
           class = 'sticky',
           amount = 0
         )
-        
+
         col.use <- c(
           cell_id = 'cell',
           segmentation_method = 'segmentation_method'
         )
-        
+
         for(option in Filter(function(x) x$req, list(
           list(
             filename = "cells.parquet",
@@ -2385,19 +2397,22 @@ ReadXenium <- function(
           ),
           list(filename = "cells.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
         ))) {
-          cell_info <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
-          if(!inherits(cell_info, "try-error")) { break }
+          cell_seg <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          if(!inherits(cell_seg, "try-error")) { break }
         }
-        
-        if(!exists('cell_info') || inherits(cell_info, "try-error")) {
+
+        if(!exists('cell_seg') || inherits(cell_seg, "try-error") || length(intersect(names(col.use), colnames(cell_seg))) != 2) {
+          warning('cells did not contain a segmentation_method column. Skipping...')
           NULL
         } else {
-          cell_info <- cell_info[, names(col.use)]
-          colnames(cell_info) <- col.use
-          
-          pcents(type = 'finish')
-          
-          data.frame(segmentation_method = cell_info$segmentation_method, row.names = cell_info$cell_id)
+          cell_seg <- cell_seg[, names(col.use)]
+          colnames(cell_seg) <- col.use
+
+          cell_seg$cell <- binary_to_string(cell_seg$cell)
+
+          psegs(type = 'finish')
+
+          data.frame(segmentation_method = cell_seg$segmentation_method, row.names = cell_seg$cell)
         }
       },
       'centroids' = {
@@ -2407,13 +2422,13 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        
+
         col.use <- c(
           x_centroid = letters[24 + flip.xy],
           y_centroid = letters[25 - flip.xy],
           cell_id = 'cell'
         )
-        
+
         for(option in Filter(function(x) x$req, list(
           list(
             filename = "cells.parquet",
@@ -2430,16 +2445,18 @@ ReadXenium <- function(
           cell_info <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
           if(!inherits(cell_info, "try-error")) { break }
         }
-        
+
         if(!exists('cell_info') || inherits(cell_info, "try-error")) {
           stop("Xenium outputs were incomplete: missing cells")
         }
-        
+
+        cell_info$cell_id <- binary_to_string(cell_info$cell_id)
+
         cell_info <- cell_info[, names(col.use)]
         colnames(cell_info) <- col.use
-        
+
         pcents(type = 'finish')
-        
+
         cell_info
       },
       'segmentations' = {
@@ -2449,7 +2466,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        
+
         for(option in Filter(function(x) x$req, list(
           list(
             filename = "cell_boundaries.parquet",
@@ -2466,19 +2483,21 @@ ReadXenium <- function(
           cell_boundaries_df <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
           if(!inherits(cell_boundaries_df, "try-error")) { break }
         }
-        
+
         if(!exists('cell_boundaries_df') || inherits(cell_boundaries_df, "try-error")) {
           stop("Xenium outputs were incomplete: missing cell_boundaries")
         }
-        
+
         colnames(cell_boundaries_df) <- c(
           'cell',
           letters[24 + flip.xy],
           letters[25 - flip.xy]
         )
-        
+
+        cell_boundaries_df$cell <- binary_to_string(cell_boundaries_df$cell)
+
         psegs(type = "finish")
-        
+
         cell_boundaries_df
       },
       'nucleus_segmentations' = {
@@ -2488,7 +2507,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        
+
         for(option in Filter(function(x) x$req, list(
           list(
             filename = "nucleus_boundaries.parquet",
@@ -2505,19 +2524,20 @@ ReadXenium <- function(
           nucleus_boundaries_df <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
           if(!inherits(nucleus_boundaries_df, "try-error")) { break }
         }
-        
+
         if(!exists('nucleus_boundaries_df') || inherits(nucleus_boundaries_df, "try-error")) {
           stop("Xenium outputs were incomplete: missing nucleus_boundaries")
         }
-        
+
         colnames(nucleus_boundaries_df) <- c(
           'cell',
           letters[24 + flip.xy],
           letters[25 - flip.xy]
         )
-        
+        nucleus_boundaries_df$cell <- binary_to_string(nucleus_boundaries_df$cell)
+
         psegs(type = "finish")
-        
+
         nucleus_boundaries_df
       },
       'microns' = {
@@ -2533,7 +2553,7 @@ ReadXenium <- function(
           y_location = letters[25-flip.xy],
           feature_name = 'gene'
         )
-        
+
         for(option in Filter(function(x) x$req, list(
           list(
             filename = "transcripts.parquet",
@@ -2550,27 +2570,29 @@ ReadXenium <- function(
           transcripts <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
           if(!inherits(transcripts, "try-error")) { break }
         }
-        
+
         if(!exists('transcripts') || inherits(transcripts, "try-error")) {
           hint <- ""
           if(file.exists(file.path(data.dir, "transcripts.parquet"))) {
-            hint <- " Xenium outputs no longer include `transcripts.csv.gz`. Instead, please install `arrow` to read transcripts.parquet"
+            hint <- ". Xenium outputs no longer include `transcripts.csv.gz`. Instead, please install `arrow` to read transcripts.parquet"
           }
-          
-          stop(paste0("Xenium outputs were incomplete: missing transcripts.", hint))
+
+          stop(paste0("Xenium outputs were incomplete: missing transcripts", hint))
         }
-        
+
         transcripts <- transcripts[, names(col.use)]
         colnames(transcripts) <- col.use
-        
+
+        transcripts$gene <- binary_to_string(transcripts$gene)
+
         pmicrons(type = 'finish')
-        
+
         transcripts
       },
       stop("Unknown Xenium input type: ", otype)
     )
   }, simplify = FALSE, USE.NAMES = TRUE)
-  
+
   metadata <- file.path(data.dir, "experiment.xenium")
   if(file.exists(metadata) && requireNamespace("jsonlite", quietly = TRUE)) {
     meta <- jsonlite::read_json(metadata)

From d362ba023767c8b5338140945de8dc8b83eb9753 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Tue, 24 Sep 2024 11:01:32 -0700
Subject: [PATCH 048/166] docs: update vignette

---
 vignettes/seurat5_spatial_vignette_2.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd
index d63f6b4cf..d334aaec3 100644
--- a/vignettes/seurat5_spatial_vignette_2.Rmd
+++ b/vignettes/seurat5_spatial_vignette_2.Rmd
@@ -216,7 +216,7 @@ ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], c
 
 # Human Lung: 10x Genomics Xenium In Situ
 
-This dataset is a preview of the Xenium multimodal cell segmentation solution using a development version of the assay user guide and analysis software. It uses the [Xenium Human Multi-Tissue and Cancer Panel](https://www.10xgenomics.com/support/in-situ-gene-expression/documentation/steps/panel-design/pre-designed-xenium-gene-expression-panels) (377 genes) which was pre-designed by 10x Genomics. In this vignette, we will demonstrate how to load Xenium data for analysis and visualization using Seurat and, in particular, how to parse and visualize cell metadata. Note that this vignette requires the use of some optional dependencies in Seurat, namely `stars`, `jsonlite` and `gmp` in order to read data from `.zarr` files.
+This dataset is a preview of the Xenium multimodal cell segmentation solution using a development version of the assay user guide and analysis software. It uses the [Xenium Human Multi-Tissue and Cancer Panel](https://www.10xgenomics.com/support/in-situ-gene-expression/documentation/steps/panel-design/pre-designed-xenium-gene-expression-panels) (377 genes) which was pre-designed by 10x Genomics. In this vignette, we will demonstrate how to load Xenium data for analysis and visualization using Seurat and, in particular, how to parse and visualize cell metadata.
 
 This uses the full Xenium output bundle available from the [FFPE Human Lung Cancer with Xenium Multimodal Cell Segmentation Preview Data](https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard) page, which can be downloaded as described below (note that this file is \~7 GB).
 

From 0c536c79ae9d700a14558a32259aeb86beb8b140 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Wed, 25 Sep 2024 15:47:49 -0700
Subject: [PATCH 049/166] doc: update NEWS and roxygenize

---
 DESCRIPTION            |  2 +-
 NAMESPACE              |  1 +
 NEWS.md                |  6 ++++++
 man/Load10X_Spatial.Rd |  2 +-
 man/ReadXenium.Rd      | 37 ++++++++++++++++++++++++++++++-------
 5 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ddaf72db8..c56a92a1d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -114,7 +114,7 @@ Collate:
     'sketching.R'
     'tree.R'
     'utilities.R'
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 Suggests:
     ape,
diff --git a/NAMESPACE b/NAMESPACE
index 005325e25..3f4704f61 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -546,6 +546,7 @@ importFrom(SeuratObject,CreateAssayObject)
 importFrom(SeuratObject,CreateCentroids)
 importFrom(SeuratObject,CreateDimReducObject)
 importFrom(SeuratObject,CreateFOV)
+importFrom(SeuratObject,CreateMolecules)
 importFrom(SeuratObject,CreateSegmentation)
 importFrom(SeuratObject,CreateSeuratObject)
 importFrom(SeuratObject,DefaultAssay)
diff --git a/NEWS.md b/NEWS.md
index d3b17bb2e..2da9bba79 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,12 @@
 # Unreleased
 
 ## Changes
+- Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`
+- Added ability to load Xenium nucleus segmentation masks
+- Updated `LoadXenium` to also read some run metadata (run start time, preservation method, panel used, organism, tissue type, instrument software version and stain kit used) into `misc` slot
+- Updated `ReadXenium` to load cell_feature_matrix.h5 when present in favor of the MEX format files
+- Added ability to read Xenium `segmentation_method` directly into `meta.data`
+- Updated `ReadXenium` to load .parquet files using `arrow` instead of .csv.gz files to support XOA 3.0
 - Fixed `LoadXenium` to accommodate datasets without "Blank Codeword" or "Unassigned Codeword" matrices
 - Fixed `ReadXenium` to properly parse multiple molecular outputs at once ([#8265](https://github.com/satijalab/seurat/issues/8265))
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd
index dd7a031b0..9bb2bbb5d 100644
--- a/man/Load10X_Spatial.Rd
+++ b/man/Load10X_Spatial.Rd
@@ -34,7 +34,7 @@ tissue}
 \item{to.upper}{Converts all feature names to upper case. Can be useful when
 analyses require comparisons between human and mouse gene names for example.}
 
-\item{image}{\code{VisiumV1}/\code{VisiumV2} instance(s) - if a vector is 
+\item{image}{\code{VisiumV1}/\code{VisiumV2} instance(s) - if a vector is
 passed in it should be co-indexed with \code{`bin.size`}}
 
 \item{...}{Arguments passed to \code{\link{Read10X_h5}}}
diff --git a/man/ReadXenium.Rd b/man/ReadXenium.Rd
index df86aa864..dbe85e398 100644
--- a/man/ReadXenium.Rd
+++ b/man/ReadXenium.Rd
@@ -5,13 +5,23 @@
 \alias{ReadXenium}
 \title{Read and Load 10x Genomics Xenium in-situ data}
 \usage{
-LoadXenium(data.dir, fov = "fov", assay = "Xenium")
+LoadXenium(
+  data.dir,
+  fov = "fov",
+  assay = "Xenium",
+  mols.qv.threshold = 20,
+  cell.centroids = TRUE,
+  molecule.coordinates = TRUE,
+  segmentations = NULL,
+  flip.xy = FALSE
+)
 
 ReadXenium(
   data.dir,
-  outs = c("matrix", "microns"),
+  outs = c("segmentation_method", "matrix", "microns"),
   type = "centroids",
-  mols.qv.threshold = 20
+  mols.qv.threshold = 20,
+  flip.xy = F
 )
 }
 \arguments{
@@ -22,10 +32,26 @@ default filenames}
 
 \item{assay}{Assay name}
 
+\item{mols.qv.threshold}{Remove transcript molecules with
+a QV less than this threshold. QV >= 20 is the standard threshold
+used to construct the cell x gene count matrix.}
+
+\item{cell.centroids}{Whether or not to load cell centroids}
+
+\item{molecule.coordinates}{Whether or not to load molecule pixel coordinates}
+
+\item{segmentations}{One of "cell", "nucleus" or NULL (to load either cell
+segmentations, nucleus segmentations or neither)}
+
+\item{flip.xy}{Whether or not to flip the x/y coordinates of the Xenium outputs
+to match what is displayed in Xenium Explorer, or fit on your screen better.}
+
 \item{outs}{Types of molecular outputs to read; choose one or more of:
 \itemize{
  \item \dQuote{matrix}: the counts matrix
  \item \dQuote{microns}: molecule coordinates
+ \item \dQuote{segmentation_method}: cell segmentation method (for runs which
+ use multi-modal segmentation)
 }}
 
 \item{type}{Type of cell spatial coordinate matrices to read; choose one
@@ -33,11 +59,8 @@ or more of:
 \itemize{
  \item \dQuote{centroids}: cell centroids in pixel coordinate space
  \item \dQuote{segmentations}: cell segmentations in pixel coordinate space
+ \item \dQuote{nucleus_segmentations}: nucleus segmentations in pixel coordinate space
 }}
-
-\item{mols.qv.threshold}{Remove transcript molecules with
-a QV less than this threshold. QV >= 20 is the standard threshold
-used to construct the cell x gene count matrix.}
 }
 \value{
 \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object

From db8bd41ea83f53c155a8376a098cdac94d94b5e5 Mon Sep 17 00:00:00 2001
From: Jordan Sicherman <jordan.sicherman@10xgenomics.com>
Date: Sun, 29 Sep 2024 15:40:29 -0700
Subject: [PATCH 050/166] fix: better error handling

---
 DESCRIPTION       | 4 ++--
 R/preprocessing.R | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e1e441a1f..f3a885a91 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Seurat
-Version: 5.1.0.9005
-Date: 2024-09-12
+Version: 5.1.0.9006
+Date: 2024-09-29
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
diff --git a/R/preprocessing.R b/R/preprocessing.R
index 40a8c6144..8d7b01eef 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2397,12 +2397,12 @@ ReadXenium <- function(
           ),
           list(filename = "cells.csv.gz", fn = function(x) read.csv(x, stringsAsFactors = FALSE), req = TRUE)
         ))) {
-          cell_seg <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))))
+          cell_seg <- try(suppressWarnings(option$fn(file.path(data.dir, option$filename))), silent = TRUE)
           if(!inherits(cell_seg, "try-error")) { break }
         }
 
         if(!exists('cell_seg') || inherits(cell_seg, "try-error") || length(intersect(names(col.use), colnames(cell_seg))) != 2) {
-          warning('cells did not contain a segmentation_method column. Skipping...')
+          warning('cells did not contain a segmentation_method column. Skipping...', call. = FALSE, immediate. = TRUE)
           NULL
         } else {
           cell_seg <- cell_seg[, names(col.use)]

From 5959407309ff3d6a276a0b46b495be1815e3c9b3 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 17:48:52 -0500
Subject: [PATCH 051/166] Add placeholder workflow, "Integration Checks"

Add .github/workflows/integration_checks.yaml
---
 .github/workflows/integration_checks.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 .github/workflows/integration_checks.yaml

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
new file mode 100644
index 000000000..030761436
--- /dev/null
+++ b/.github/workflows/integration_checks.yaml
@@ -0,0 +1,15 @@
+name: Integration Checks
+
+on:
+  push:
+    branches: 
+    - develop
+  pull_request:
+    branches: 
+    - develop
+
+jobs:
+  check-package:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "This is a placeholder workflow."

From d669d9dd97986e6ca217608d3458c9ab727d8213 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 08:57:08 -0500
Subject: [PATCH 052/166] Drop files for old CI checks

---
 .travis.yml         | 55 -----------------------------------------
 appveyor.yml        | 60 ---------------------------------------------
 azure-pipelines.yml | 59 --------------------------------------------
 travis_setup.sh     | 28 ---------------------
 4 files changed, 202 deletions(-)
 delete mode 100644 .travis.yml
 delete mode 100644 appveyor.yml
 delete mode 100644 azure-pipelines.yml
 delete mode 100644 travis_setup.sh

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 38fa4c0e2..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
-
-language: r
-sudo: required
-cache: packages
-
-os:
-  - linux
-  - osx
-
-r:
-  - release
-  - devel
-  
-matrix:
-  exclude:
-    - r: devel
-      os: osx
-
-env:
-  global:
-    - _R_CHECK_FORCE_SUGGESTS_=FALSE
-    - ASAN="-fsanitize=address -fno-omit-frame-pointer"
-    - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
-    - HDF5_VERSION=1.8.17
-    - HDF5_RELEASE_URL="https://support.hdfgroup.org/ftp/HDF5/releases"
-
-before_install:
-  - chmod +x travis_setup.sh
-  - ./travis_setup.sh
-
-addons:
-  apt:
-    packages:
-      - subversion
-      - autoconf
-      - build-essential
-      - libtool
-      - libmagick++-dev
-  homebrew:
-    packages:
-      - libgit2
-
-#bioc_packages:
-#  - GenomeInfoDbData
-#  - DESeq2
-#  - MAST
-#  - S4Vectors
-#  - SummarizedExperiment
-#  - SingleCellExperiment
-
-r_github_packages:
-  - mojaveazure/loomR
-
-warnings_are_errors: false
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 9c400d215..000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-# DO NOT CHANGE the "init" and "install" sections below
-
-# Download script file from GitHub
-init:
-  ps: |
-        $ErrorActionPreference = "Stop"
-        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
-        Import-Module '..\appveyor-tool.ps1'
-install:
-  - "%PYTHON%\\python.exe -m pip install wheel"
-  - "%PYTHON%\\python.exe -m pip install phate"
-  - ps: Bootstrap
-
-# Adapt as necessary starting from here
-
-environment:
-  global:
-    R_ARCH: x64
-    USE_RTOOLS: true
-    BIOC_USE_DEVEL: "FALSE"
-    _R_CHECK_FORCE_SUGGESTS_: false
-    PYTHON: "C:\\Python36-x64"
-    RETICULATE_PYTHON: "C:\\Python36-x64"
-    CRAN: "https://cloud.r-project.org"
-
-build_script:
-  - travis-tool.sh install_deps
-  - travis-tool.sh r_binary_install curl
-  - travis-tool.sh bioc_install GenomeInfoDbData
-  - travis-tool.sh bioc_install DESeq2
-  - travis-tool.sh bioc_install MAST
-  - travis-tool.sh bioc_install S4Vectors
-  - travis-tool.sh bioc_install SummarizedExperiment
-  - travis-tool.sh bioc_install SingleCellExperiment
-
-test_script:
-  - travis-tool.sh run_tests
-
-on_failure:
-  - 7z a failure.zip *.Rcheck\*
-  - appveyor PushArtifact failure.zip
-
-artifacts:
-  - path: '*.Rcheck\**\*.log'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.out'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.fail'
-    name: Logs
-
-  - path: '*.Rcheck\**\*.Rout'
-    name: Logs
-
-  - path: '\*_*.tar.gz'
-    name: Bits
-
-  - path: '\*_*.zip'
-    name: Bits
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 134640395..000000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Inspired by Jim Hester's Azure Pipelines tests
-# https://github.com/jimhester/azuretest
-
-trigger:
-- master
-- develop
-
-pr:
-- master
-- develop
-- release/*
-
-jobs:
-  - job: 'Build_all_vignettes'
-    timeoutInMinutes: 4320
-    pool: Pool2
-    variables:
-      R_LIBS_USER: '$(Agent.BuildDirectory)/R/library'
-    container: satijalab/seurat-pkgdown:develop
-    steps:
-      - script: |
-            set -e
-            echo "R_LIBS =" "$(Rscript -e "cat(Sys.getenv('R_LIBS_USER')[3])")" > .Renviron
-            Rscript --default-packages=stats,graphics,grDevices,utils,datasets,base,methods -e "install.packages('.', type = 'source', repos = NULL, dependencies = FALSE, lib = Sys.getenv('R_LIBS_USER'))"
-            mv /data/* data/
-            mkdir output
-            mkdir output/images
-            mkdir output/timings
-            Rscript -e "pkgdown::init_site()"
-            Rscript -e "pkgdown::build_article('pbmc3k_tutorial')"
-            ls vignettes/*.Rmd | xargs -n1 basename | grep -v 'pbmc3k_tutorial.Rmd' | cut -f 1 -d '.' | parallel -j4 "Rscript -e 'pkgdown::build_article(\"{}\")'"
-            Rscript -e "pkgdown::build_site(lazy = TRUE)"
-            cp vignettes/assets/* docs/articles/assets/
-        displayName: 'Build pkgdown site'
-      - script: |
-            find ./docs/ -name '*.png' -print0 | xargs -0 -P8 -L1 pngquant --ext .png --force --speed 1
-            html-minifier --input-dir ./docs/ --output-dir ./docs/ --file-ext html --collapse-whitespace --remove-comments --remove-optional-tags --remove-redundant-attributes --remove-script-type-attributes --remove-tag-whitespace --use-short-doctype --minify-css true --minify-js true 
-        displayName: 'Minify'
-      - task: CopyFiles@2
-        inputs:
-          sourceFolder: '$(Build.SourcesDirectory)'
-          contents: '**/docs/**'
-          TargetFolder: '$(Build.ArtifactStagingDirectory)/docs'
-      - task: CopyFiles@2
-        inputs:
-          sourceFolder: '$(Build.SourcesDirectory)'
-          contents: '**/output/timings/**'
-          TargetFolder: '$(Build.ArtifactStagingDirectory)/timings'
-      - task: PublishBuildArtifacts@1
-        inputs:
-          pathtoPublish: '$(Build.ArtifactStagingDirectory)/docs/docs/'
-          artifactName: docs
-      - task: PublishBuildArtifacts@1
-        inputs:
-          pathtoPublish: '$(Build.ArtifactStagingDirectory)/timings/output/timings/'
-          artifactName: timings
-      - script: |
-          netlify deploy --prod --dir '$(Build.ArtifactStagingDirectory)/docs/docs/' --site '$(site_id)' --auth '$(auth_token)'
-        displayName: 'Netlify Preview Deploy'
diff --git a/travis_setup.sh b/travis_setup.sh
deleted file mode 100644
index 345c5d486..000000000
--- a/travis_setup.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-if [ "$TRAVIS_OS_NAME" != "osx" ]; then #
-  cd ..
-  wget "$HDF5_RELEASE_URL/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz"
-  tar -xzf "hdf5-$HDF5_VERSION.tar.gz"
-  cd "hdf5-$HDF5_VERSION"
-  CFLAGS="-w" ./configure --quiet --prefix=/usr/local
-  sudo CFLAGS="-w" make --quiet install
-  cd ../seurat
-fi
-
-# # install python
-# if [[ $TRAVIS_OS_NAME == "linux" ]]; then
-#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
-# elif [[ $TRAVIS_OS_NAME == "osx" ]]; then
-#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh
-# fi
-
-# bash miniconda.sh -b -p $HOME/miniconda
-# export PATH="$HOME/miniconda/bin:$PATH"
-# export RETICULATE_PYTHON="$HOME/miniconda/bin/python"
-# hash -r
-# conda config --set always_yes yes --set changeps1 no
-# conda update -q conda
-# conda info -a
-# pip install --upgrade pip
-# pip install phate

From 230a9f1d687c76b92142cf5c4f663b2c297579b2 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 08:57:59 -0500
Subject: [PATCH 053/166] Update .Rbuildignore

---
 .Rbuildignore | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 875e4cd2b..e2d47f235 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,11 +4,7 @@
 ^\.Rproj\.user$
 ^.*\.old$
 .git
-.travis.yml
-appveyor.yml
-azure-pipelines.yml
 cran-comments.md
-travis_setup.sh
 CODE_OF_CONDUCT.md
 ^_pkgdown\.yaml$
 ^docs$

From 5e5bb51214965c0cbc41f658057dfa346e9c8252 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 08:58:40 -0500
Subject: [PATCH 054/166] Drop .github/workflows/R_CMD_check.yaml

---
 .github/workflows/R_CMD_check.yaml | 41 ------------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 .github/workflows/R_CMD_check.yaml

diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml
deleted file mode 100644
index b7b64ab33..000000000
--- a/.github/workflows/R_CMD_check.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-on:
-    push:
-        branches:
-            - master
-            - develop
-    pull_request:
-
-env:
-  GITHUB_PAT: ${{ secrets.PAT }}
-
-jobs:
-    r-cmd-check:
-
-        if: "!contains(github.event.head_commit.message, 'ci-skip')"
-
-        name: R CMD check
-        container:
-            image: satijalab/seurat:develop
-        runs-on: [ self-hosted ]
-        
-        steps:
-            - uses: actions/checkout@v3
-            - name: Remove vignettes dir
-              run: rm -rf 'vignettes/'
-              shell: bash
-
-            - name: Install additional dependencies
-              run: |
-                  Rscript -e "remotes::install_github('mojaveazure/seurat-object', ref = 'feat/CalN_generic')"
-                  Rscript -e "remotes::install_github('bnprks/BPCells')"
-
-            - name: Check
-              run:  devtools::check(args = "--no-manual", error_on = "warning", check_dir = "check", force_suggests = FALSE)
-              shell: Rscript {0}
-
-#            - name: Upload check results
-#              if: failure()
-#              uses: actions/upload-artifact@master
-#              with:
-#                  name: results
-#                  path: check

From 393eb3cfd848a6a6b672b7cb9ee0de2436a63bda Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 09:02:03 -0500
Subject: [PATCH 055/166] Fork integration-checks.yaml from seurat-object

---
 .github/workflows/integration_checks.yaml | 39 +++++++++++++++++++++--
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index 030761436..297acdb9c 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -1,15 +1,48 @@
 name: Integration Checks
 
+# Because `main` is a protected branch this workflow is triggered when a PR 
+# is opened/updated and again when it is merged.
 on:
   push:
     branches: 
-    - develop
+    - main
   pull_request:
     branches: 
-    - develop
+    - main
 
 jobs:
   check-package:
     runs-on: ubuntu-latest
+    
+    # Use the `satijalab/seurat-ci` Docker image as the runner environment. 
+    # This image is pre-configured with everything required for running
+    # integration checks, for more details, see
+    # https://hub.docker.com/repository/docker/satijalab/seurat-ci/general.
+    container:
+      image: satijalab/seurat-ci:latest
+
     steps:
-      - run: echo "This is a placeholder workflow."
+      # Pull the latest changes from the repository down to the runner.
+      - name: Checkout
+        uses: actions/checkout@v4
+      
+      # Install the package and all its dependencies using scripts from 
+      # `littler`, see https://eddelbuettel.github.io/littler/ for details. 
+      # `Seurat` is listed under "Enhances" so it also needs to be installed. 
+      - name: Install Dependencies
+        run: installDeps.r -s && install.r Seurat
+      
+      # Run CRAN checks, if any ERRORs or WARNINGs are raised the check fails.
+      - name: Run Checks
+        run: rcmdcheck::rcmdcheck(args = "--as-cran", error_on="warning")
+        shell: Rscript {0}
+        continue-on-error: true
+      
+      # Build the `pkgdown` site, if any errors are raised the check fails.
+      - name: Build Website
+        run: | 
+          pkgdown::build_site_github_pages(
+            new_process = FALSE, 
+            install = FALSE
+          )
+        shell: Rscript {0}

From dadba04460a678a4ca73932fbd37c9bda09a0991 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 09:04:38 -0500
Subject: [PATCH 056/166] Update trigger to push or PR for develop

---
 .github/workflows/integration_checks.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index 297acdb9c..3883c8c47 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -1,14 +1,14 @@
 name: Integration Checks
 
-# Because `main` is a protected branch this workflow is triggered when a PR 
+# Because `develop` is a protected branch this workflow is triggered when a PR 
 # is opened/updated and again when it is merged.
 on:
   push:
     branches: 
-    - main
+    - develop
   pull_request:
     branches: 
-    - main
+    - develop
 
 jobs:
   check-package:

From e1959814d5490c9210f78f830b3838707bdf9c33 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 09:04:52 -0500
Subject: [PATCH 057/166] Drop Seurat CRAN install

---
 .github/workflows/integration_checks.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index 3883c8c47..38c4553bf 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -28,9 +28,8 @@ jobs:
       
       # Install the package and all its dependencies using scripts from 
       # `littler`, see https://eddelbuettel.github.io/littler/ for details. 
-      # `Seurat` is listed under "Enhances" so it also needs to be installed. 
       - name: Install Dependencies
-        run: installDeps.r -s && install.r Seurat
+        run: installDeps.r -s
       
       # Run CRAN checks, if any ERRORs or WARNINGs are raised the check fails.
       - name: Run Checks

From 592aa5caf568938dde34f2a0777aec0327194e11 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 09:05:07 -0500
Subject: [PATCH 058/166] Drop "Build Website" step

---
 .github/workflows/integration_checks.yaml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index 38c4553bf..a0eab40c2 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -36,12 +36,3 @@ jobs:
         run: rcmdcheck::rcmdcheck(args = "--as-cran", error_on="warning")
         shell: Rscript {0}
         continue-on-error: true
-      
-      # Build the `pkgdown` site, if any errors are raised the check fails.
-      - name: Build Website
-        run: | 
-          pkgdown::build_site_github_pages(
-            new_process = FALSE, 
-            install = FALSE
-          )
-        shell: Rscript {0}

From 747cffd7b157e4864dc05cb1a942285a3c3bbaf8 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 10:23:41 -0500
Subject: [PATCH 059/166] Add "Run Tests" step

---
 .github/workflows/integration_checks.yaml | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index a0eab40c2..c1601892b 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -31,8 +31,20 @@ jobs:
       - name: Install Dependencies
         run: installDeps.r -s
       
+      # A significant number of `Seurat`'s tests are skipped during CRAN checks
+      # so we'll run the tests separately. 
+      - name: Run Tests
+        run: testthat::test_local()
+        shell: Rscript {0}
+        continue-on-error: true
+      
       # Run CRAN checks, if any ERRORs or WARNINGs are raised the check fails.
-      - name: Run Checks
-        run: rcmdcheck::rcmdcheck(args = "--as-cran", error_on="warning")
+      # Avoid re-running tests redundantly. 
+      - name: Run CRAN Checks (no tests)
+        run: |
+          rcmdcheck::rcmdcheck(
+            args = c("--as-cran", "--no-tests"), 
+            error_on="warning"
+          )
         shell: Rscript {0}
         continue-on-error: true

From e194df3b8266eec456260f091b206408ef649a98 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 15:18:26 -0500
Subject: [PATCH 060/166] Add glamPoi to suggested dependencies

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index f3a885a91..d5a81113c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -120,6 +120,7 @@ Suggests:
     ape,
     arrow,
     BPCells,
+    glmGamPoi,
     rsvd,
     testthat,
     hdf5r,

From fc975700c0fdf6f1482fe8a99b47fa4194c901b9 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 15:20:56 -0500
Subject: [PATCH 061/166] Add sf to suggested dependencies

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d5a81113c..973621216 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -147,4 +147,5 @@ Suggests:
     R.utils,
     presto,
     DelayedArray,
-    harmony
+    harmony,
+    sf (>= 1.0.0)

From 1d9b32020731e9a26d1d440701b52a6a06ddac8a Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 16:16:00 -0500
Subject: [PATCH 062/166] Sort "Suggests" field in DESCRIPTION

---
 DESCRIPTION | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 973621216..f6659a1ed 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -119,33 +119,33 @@ Encoding: UTF-8
 Suggests:
     ape,
     arrow,
+    Biobase,
+    BiocGenerics,
     BPCells,
-    glmGamPoi,
-    rsvd,
-    testthat,
-    hdf5r,
-    S4Vectors,
-    SummarizedExperiment,
-    SingleCellExperiment,
-    MAST,
+    data.table,
     DESeq2,
-    BiocGenerics,
+    DelayedArray,
+    enrichR,
     GenomicRanges,
     GenomeInfoDb,
+    glmGamPoi,
+    ggrastr,
+    harmony,
+    hdf5r,
     IRanges,
-    rtracklayer,
-    Rfast2,
-    monocle,
-    Biobase,
-    VGAM,
     limma,
+    MAST,
     metap,
-    enrichR,
     mixtools,
-    ggrastr,
-    data.table,
-    R.utils,
+    monocle,
     presto,
-    DelayedArray,
-    harmony,
-    sf (>= 1.0.0)
+    rsvd,
+    R.utils,
+    Rfast2,
+    rtracklayer,
+    S4Vectors,
+    sf (>= 1.0.0),
+    SingleCellExperiment,
+    SummarizedExperiment,
+    testthat,
+    VGAM

From 8b4c2d4ccc9bd1de0def7693418c709ae33f1f6b Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 16:17:44 -0500
Subject: [PATCH 063/166] Re-order DESCRIPTION headers

---
 DESCRIPTION | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f6659a1ed..ac55f5f9a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,6 +29,7 @@ Authors@R: c(
   person(given = "Shiwei", family = "Zheng", email = "szheng@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-6682-6743")),
   person("Satija Lab and Collaborators", role = "fnd")
   )
+License: MIT + file LICENSE
 URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat
 BugReports: https://github.com/satijalab/seurat/issues
 Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev
@@ -90,32 +91,6 @@ Imports:
     tools,
     utils,
     uwot (>= 0.1.10)
-LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress
-License: MIT + file LICENSE
-LazyData: true
-Collate:
-    'RcppExports.R'
-    'reexports.R'
-    'generics.R'
-    'clustering.R'
-    'visualization.R'
-    'convenience.R'
-    'data.R'
-    'differential_expression.R'
-    'dimensional_reduction.R'
-    'integration.R'
-    'zzz.R'
-    'integration5.R'
-    'mixscape.R'
-    'objects.R'
-    'preprocessing.R'
-    'preprocessing5.R'
-    'roxygen.R'
-    'sketching.R'
-    'tree.R'
-    'utilities.R'
-RoxygenNote: 7.3.2
-Encoding: UTF-8
 Suggests:
     ape,
     arrow,
@@ -149,3 +124,28 @@ Suggests:
     SummarizedExperiment,
     testthat,
     VGAM
+LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.3.2
+Collate:
+    'RcppExports.R'
+    'reexports.R'
+    'generics.R'
+    'clustering.R'
+    'visualization.R'
+    'convenience.R'
+    'data.R'
+    'differential_expression.R'
+    'dimensional_reduction.R'
+    'integration.R'
+    'zzz.R'
+    'integration5.R'
+    'mixscape.R'
+    'objects.R'
+    'preprocessing.R'
+    'preprocessing5.R'
+    'roxygen.R'
+    'sketching.R'
+    'tree.R'
+    'utilities.R'

From 6cb72d5473da02c2481ffbaa506204fa6385acad Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 16:29:58 -0500
Subject: [PATCH 064/166] Drop "Date" field from DESCRIPTION

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ac55f5f9a..9405565bf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,5 @@
 Package: Seurat
 Version: 5.1.0.9006
-Date: 2024-09-29
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From e3fcfdc1bc1dcd7c9226f44b12f2e1b96a9dde1d Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 16:30:13 -0500
Subject: [PATCH 065/166] Add "BuildManual" field to description

Set value to "true"
---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9405565bf..aaf6bc855 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -124,6 +124,7 @@ Suggests:
     testthat,
     VGAM
 LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress
+BuildManual: true
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.2

From 52b1ba7772731ffa34e4bc93846fc204f7d9be3e Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 16:30:26 -0500
Subject: [PATCH 066/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index aaf6bc855..e18541d10 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9006
+Version: 5.1.0.9007
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 5b8e8682dc708e091421784c3e80fa6094176081 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 19:09:44 -0500
Subject: [PATCH 067/166] Replace "continue-on-error" with "if: always()"

---
 .github/workflows/integration_checks.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index c1601892b..eab93af33 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -36,15 +36,15 @@ jobs:
       - name: Run Tests
         run: testthat::test_local()
         shell: Rscript {0}
-        continue-on-error: true
       
       # Run CRAN checks, if any ERRORs or WARNINGs are raised the check fails.
       # Avoid re-running tests redundantly. 
       - name: Run CRAN Checks (no tests)
+        # Run this step even if the previous one failed.
+        if: always()
         run: |
           rcmdcheck::rcmdcheck(
             args = c("--as-cran", "--no-tests"), 
             error_on="warning"
           )
         shell: Rscript {0}
-        continue-on-error: true

From 4767dfc10b570c78fe73c1e00b6fb82ed9837bf1 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 9 Dec 2024 19:10:50 -0500
Subject: [PATCH 068/166] Run CRAN checks before tests in integration checks

---
 .github/workflows/integration_checks.yaml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index eab93af33..0d725d8d0 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -31,20 +31,20 @@ jobs:
       - name: Install Dependencies
         run: installDeps.r -s
       
-      # A significant number of `Seurat`'s tests are skipped during CRAN checks
-      # so we'll run the tests separately. 
-      - name: Run Tests
-        run: testthat::test_local()
-        shell: Rscript {0}
-      
       # Run CRAN checks, if any ERRORs or WARNINGs are raised the check fails.
-      # Avoid re-running tests redundantly. 
+      # Certain tests are skipped when running as CRAN—skip all tests so they
+      # can be run together in a subsequent step.
       - name: Run CRAN Checks (no tests)
-        # Run this step even if the previous one failed.
-        if: always()
         run: |
           rcmdcheck::rcmdcheck(
             args = c("--as-cran", "--no-tests"), 
             error_on="warning"
           )
         shell: Rscript {0}
+            
+      # Because tests weren't included in CRAN checks, run them here.
+      - name: Run Tests
+        # Run this step even if the previous one failed.
+        if: always()
+        run: testthat::test_local()
+        shell: Rscript {0}

From 35fa0c0acda8eefb550cba9e58cfdc347ad68819 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 11 Dec 2024 08:14:50 -0500
Subject: [PATCH 069/166] Revert "Drop files for old CI checks"

This reverts commit d669d9dd97986e6ca217608d3458c9ab727d8213.
---
 .travis.yml         | 55 +++++++++++++++++++++++++++++++++++++++++
 appveyor.yml        | 60 +++++++++++++++++++++++++++++++++++++++++++++
 azure-pipelines.yml | 59 ++++++++++++++++++++++++++++++++++++++++++++
 travis_setup.sh     | 28 +++++++++++++++++++++
 4 files changed, 202 insertions(+)
 create mode 100644 .travis.yml
 create mode 100644 appveyor.yml
 create mode 100644 azure-pipelines.yml
 create mode 100644 travis_setup.sh

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..38fa4c0e2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,55 @@
+# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
+
+language: r
+sudo: required
+cache: packages
+
+os:
+  - linux
+  - osx
+
+r:
+  - release
+  - devel
+  
+matrix:
+  exclude:
+    - r: devel
+      os: osx
+
+env:
+  global:
+    - _R_CHECK_FORCE_SUGGESTS_=FALSE
+    - ASAN="-fsanitize=address -fno-omit-frame-pointer"
+    - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
+    - HDF5_VERSION=1.8.17
+    - HDF5_RELEASE_URL="https://support.hdfgroup.org/ftp/HDF5/releases"
+
+before_install:
+  - chmod +x travis_setup.sh
+  - ./travis_setup.sh
+
+addons:
+  apt:
+    packages:
+      - subversion
+      - autoconf
+      - build-essential
+      - libtool
+      - libmagick++-dev
+  homebrew:
+    packages:
+      - libgit2
+
+#bioc_packages:
+#  - GenomeInfoDbData
+#  - DESeq2
+#  - MAST
+#  - S4Vectors
+#  - SummarizedExperiment
+#  - SingleCellExperiment
+
+r_github_packages:
+  - mojaveazure/loomR
+
+warnings_are_errors: false
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 000000000..9c400d215
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,60 @@
+# DO NOT CHANGE the "init" and "install" sections below
+
+# Download script file from GitHub
+init:
+  ps: |
+        $ErrorActionPreference = "Stop"
+        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
+        Import-Module '..\appveyor-tool.ps1'
+install:
+  - "%PYTHON%\\python.exe -m pip install wheel"
+  - "%PYTHON%\\python.exe -m pip install phate"
+  - ps: Bootstrap
+
+# Adapt as necessary starting from here
+
+environment:
+  global:
+    R_ARCH: x64
+    USE_RTOOLS: true
+    BIOC_USE_DEVEL: "FALSE"
+    _R_CHECK_FORCE_SUGGESTS_: false
+    PYTHON: "C:\\Python36-x64"
+    RETICULATE_PYTHON: "C:\\Python36-x64"
+    CRAN: "https://cloud.r-project.org"
+
+build_script:
+  - travis-tool.sh install_deps
+  - travis-tool.sh r_binary_install curl
+  - travis-tool.sh bioc_install GenomeInfoDbData
+  - travis-tool.sh bioc_install DESeq2
+  - travis-tool.sh bioc_install MAST
+  - travis-tool.sh bioc_install S4Vectors
+  - travis-tool.sh bioc_install SummarizedExperiment
+  - travis-tool.sh bioc_install SingleCellExperiment
+
+test_script:
+  - travis-tool.sh run_tests
+
+on_failure:
+  - 7z a failure.zip *.Rcheck\*
+  - appveyor PushArtifact failure.zip
+
+artifacts:
+  - path: '*.Rcheck\**\*.log'
+    name: Logs
+
+  - path: '*.Rcheck\**\*.out'
+    name: Logs
+
+  - path: '*.Rcheck\**\*.fail'
+    name: Logs
+
+  - path: '*.Rcheck\**\*.Rout'
+    name: Logs
+
+  - path: '\*_*.tar.gz'
+    name: Bits
+
+  - path: '\*_*.zip'
+    name: Bits
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 000000000..134640395
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,59 @@
+# Inspired by Jim Hester's Azure Pipelines tests
+# https://github.com/jimhester/azuretest
+
+trigger:
+- master
+- develop
+
+pr:
+- master
+- develop
+- release/*
+
+jobs:
+  - job: 'Build_all_vignettes'
+    timeoutInMinutes: 4320
+    pool: Pool2
+    variables:
+      R_LIBS_USER: '$(Agent.BuildDirectory)/R/library'
+    container: satijalab/seurat-pkgdown:develop
+    steps:
+      - script: |
+            set -e
+            echo "R_LIBS =" "$(Rscript -e "cat(Sys.getenv('R_LIBS_USER')[3])")" > .Renviron
+            Rscript --default-packages=stats,graphics,grDevices,utils,datasets,base,methods -e "install.packages('.', type = 'source', repos = NULL, dependencies = FALSE, lib = Sys.getenv('R_LIBS_USER'))"
+            mv /data/* data/
+            mkdir output
+            mkdir output/images
+            mkdir output/timings
+            Rscript -e "pkgdown::init_site()"
+            Rscript -e "pkgdown::build_article('pbmc3k_tutorial')"
+            ls vignettes/*.Rmd | xargs -n1 basename | grep -v 'pbmc3k_tutorial.Rmd' | cut -f 1 -d '.' | parallel -j4 "Rscript -e 'pkgdown::build_article(\"{}\")'"
+            Rscript -e "pkgdown::build_site(lazy = TRUE)"
+            cp vignettes/assets/* docs/articles/assets/
+        displayName: 'Build pkgdown site'
+      - script: |
+            find ./docs/ -name '*.png' -print0 | xargs -0 -P8 -L1 pngquant --ext .png --force --speed 1
+            html-minifier --input-dir ./docs/ --output-dir ./docs/ --file-ext html --collapse-whitespace --remove-comments --remove-optional-tags --remove-redundant-attributes --remove-script-type-attributes --remove-tag-whitespace --use-short-doctype --minify-css true --minify-js true 
+        displayName: 'Minify'
+      - task: CopyFiles@2
+        inputs:
+          sourceFolder: '$(Build.SourcesDirectory)'
+          contents: '**/docs/**'
+          TargetFolder: '$(Build.ArtifactStagingDirectory)/docs'
+      - task: CopyFiles@2
+        inputs:
+          sourceFolder: '$(Build.SourcesDirectory)'
+          contents: '**/output/timings/**'
+          TargetFolder: '$(Build.ArtifactStagingDirectory)/timings'
+      - task: PublishBuildArtifacts@1
+        inputs:
+          pathtoPublish: '$(Build.ArtifactStagingDirectory)/docs/docs/'
+          artifactName: docs
+      - task: PublishBuildArtifacts@1
+        inputs:
+          pathtoPublish: '$(Build.ArtifactStagingDirectory)/timings/output/timings/'
+          artifactName: timings
+      - script: |
+          netlify deploy --prod --dir '$(Build.ArtifactStagingDirectory)/docs/docs/' --site '$(site_id)' --auth '$(auth_token)'
+        displayName: 'Netlify Preview Deploy'
diff --git a/travis_setup.sh b/travis_setup.sh
new file mode 100644
index 000000000..345c5d486
--- /dev/null
+++ b/travis_setup.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+if [ "$TRAVIS_OS_NAME" != "osx" ]; then #
+  cd ..
+  wget "$HDF5_RELEASE_URL/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz"
+  tar -xzf "hdf5-$HDF5_VERSION.tar.gz"
+  cd "hdf5-$HDF5_VERSION"
+  CFLAGS="-w" ./configure --quiet --prefix=/usr/local
+  sudo CFLAGS="-w" make --quiet install
+  cd ../seurat
+fi
+
+# # install python
+# if [[ $TRAVIS_OS_NAME == "linux" ]]; then
+#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
+# elif [[ $TRAVIS_OS_NAME == "osx" ]]; then
+#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh
+# fi
+
+# bash miniconda.sh -b -p $HOME/miniconda
+# export PATH="$HOME/miniconda/bin:$PATH"
+# export RETICULATE_PYTHON="$HOME/miniconda/bin/python"
+# hash -r
+# conda config --set always_yes yes --set changeps1 no
+# conda update -q conda
+# conda info -a
+# pip install --upgrade pip
+# pip install phate

From 7ea77840ecf34bad4f32eb4b740b064239348f9d Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 11 Dec 2024 08:44:14 -0500
Subject: [PATCH 070/166] Drop .travis.yml and travis_setup.sh

---
 .travis.yml     | 55 -------------------------------------------------
 travis_setup.sh | 28 -------------------------
 2 files changed, 83 deletions(-)
 delete mode 100644 .travis.yml
 delete mode 100644 travis_setup.sh

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 38fa4c0e2..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
-
-language: r
-sudo: required
-cache: packages
-
-os:
-  - linux
-  - osx
-
-r:
-  - release
-  - devel
-  
-matrix:
-  exclude:
-    - r: devel
-      os: osx
-
-env:
-  global:
-    - _R_CHECK_FORCE_SUGGESTS_=FALSE
-    - ASAN="-fsanitize=address -fno-omit-frame-pointer"
-    - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
-    - HDF5_VERSION=1.8.17
-    - HDF5_RELEASE_URL="https://support.hdfgroup.org/ftp/HDF5/releases"
-
-before_install:
-  - chmod +x travis_setup.sh
-  - ./travis_setup.sh
-
-addons:
-  apt:
-    packages:
-      - subversion
-      - autoconf
-      - build-essential
-      - libtool
-      - libmagick++-dev
-  homebrew:
-    packages:
-      - libgit2
-
-#bioc_packages:
-#  - GenomeInfoDbData
-#  - DESeq2
-#  - MAST
-#  - S4Vectors
-#  - SummarizedExperiment
-#  - SingleCellExperiment
-
-r_github_packages:
-  - mojaveazure/loomR
-
-warnings_are_errors: false
diff --git a/travis_setup.sh b/travis_setup.sh
deleted file mode 100644
index 345c5d486..000000000
--- a/travis_setup.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-if [ "$TRAVIS_OS_NAME" != "osx" ]; then #
-  cd ..
-  wget "$HDF5_RELEASE_URL/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz"
-  tar -xzf "hdf5-$HDF5_VERSION.tar.gz"
-  cd "hdf5-$HDF5_VERSION"
-  CFLAGS="-w" ./configure --quiet --prefix=/usr/local
-  sudo CFLAGS="-w" make --quiet install
-  cd ../seurat
-fi
-
-# # install python
-# if [[ $TRAVIS_OS_NAME == "linux" ]]; then
-#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
-# elif [[ $TRAVIS_OS_NAME == "osx" ]]; then
-#     wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh
-# fi
-
-# bash miniconda.sh -b -p $HOME/miniconda
-# export PATH="$HOME/miniconda/bin:$PATH"
-# export RETICULATE_PYTHON="$HOME/miniconda/bin/python"
-# hash -r
-# conda config --set always_yes yes --set changeps1 no
-# conda update -q conda
-# conda info -a
-# pip install --upgrade pip
-# pip install phate

From d4b080c3b0e3f5ba0cfad660dc44b16549517a19 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 11 Dec 2024 08:45:03 -0500
Subject: [PATCH 071/166] Drop azure-pipeline.yml

---
 azure-pipelines.yml | 59 ---------------------------------------------
 1 file changed, 59 deletions(-)
 delete mode 100644 azure-pipelines.yml

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 134640395..000000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Inspired by Jim Hester's Azure Pipelines tests
-# https://github.com/jimhester/azuretest
-
-trigger:
-- master
-- develop
-
-pr:
-- master
-- develop
-- release/*
-
-jobs:
-  - job: 'Build_all_vignettes'
-    timeoutInMinutes: 4320
-    pool: Pool2
-    variables:
-      R_LIBS_USER: '$(Agent.BuildDirectory)/R/library'
-    container: satijalab/seurat-pkgdown:develop
-    steps:
-      - script: |
-            set -e
-            echo "R_LIBS =" "$(Rscript -e "cat(Sys.getenv('R_LIBS_USER')[3])")" > .Renviron
-            Rscript --default-packages=stats,graphics,grDevices,utils,datasets,base,methods -e "install.packages('.', type = 'source', repos = NULL, dependencies = FALSE, lib = Sys.getenv('R_LIBS_USER'))"
-            mv /data/* data/
-            mkdir output
-            mkdir output/images
-            mkdir output/timings
-            Rscript -e "pkgdown::init_site()"
-            Rscript -e "pkgdown::build_article('pbmc3k_tutorial')"
-            ls vignettes/*.Rmd | xargs -n1 basename | grep -v 'pbmc3k_tutorial.Rmd' | cut -f 1 -d '.' | parallel -j4 "Rscript -e 'pkgdown::build_article(\"{}\")'"
-            Rscript -e "pkgdown::build_site(lazy = TRUE)"
-            cp vignettes/assets/* docs/articles/assets/
-        displayName: 'Build pkgdown site'
-      - script: |
-            find ./docs/ -name '*.png' -print0 | xargs -0 -P8 -L1 pngquant --ext .png --force --speed 1
-            html-minifier --input-dir ./docs/ --output-dir ./docs/ --file-ext html --collapse-whitespace --remove-comments --remove-optional-tags --remove-redundant-attributes --remove-script-type-attributes --remove-tag-whitespace --use-short-doctype --minify-css true --minify-js true 
-        displayName: 'Minify'
-      - task: CopyFiles@2
-        inputs:
-          sourceFolder: '$(Build.SourcesDirectory)'
-          contents: '**/docs/**'
-          TargetFolder: '$(Build.ArtifactStagingDirectory)/docs'
-      - task: CopyFiles@2
-        inputs:
-          sourceFolder: '$(Build.SourcesDirectory)'
-          contents: '**/output/timings/**'
-          TargetFolder: '$(Build.ArtifactStagingDirectory)/timings'
-      - task: PublishBuildArtifacts@1
-        inputs:
-          pathtoPublish: '$(Build.ArtifactStagingDirectory)/docs/docs/'
-          artifactName: docs
-      - task: PublishBuildArtifacts@1
-        inputs:
-          pathtoPublish: '$(Build.ArtifactStagingDirectory)/timings/output/timings/'
-          artifactName: timings
-      - script: |
-          netlify deploy --prod --dir '$(Build.ArtifactStagingDirectory)/docs/docs/' --site '$(site_id)' --auth '$(auth_token)'
-        displayName: 'Netlify Preview Deploy'

From ea78457aed9f7cf1d0436ee996b8d3ec3ebeb9eb Mon Sep 17 00:00:00 2001
From: roi-meir <meir.roi@gmail.com>
Date: Mon, 30 Sep 2024 10:13:31 +0300
Subject: [PATCH 072/166] Fix ssl problem in bioc_install command

commit a0de5504b48a3705bae98a9b1363863cbde35726
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 14:22:13 2024 +0300

    Remove env step

commit 9884669232fc9ae7d652f339207d6b38db897055
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 14:05:14 2024 +0300

    Move new env under global section

commit ab6cf2d30467d5ddea2f0d7c08700537bc028358
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 14:04:51 2024 +0300

    Remove R_LIBCURL_SSL_REVOKE_BEST_EFFORT from travis config

commit 53a258dd71e3ffc5d2acfb07ae2a18161f2f60a8
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 13:56:48 2024 +0300

    Fix CURL_SSL_BACKEND env

commit 215775cc43497cc1dfc3ef22f9bf2b3f88c63cf8
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 13:52:46 2024 +0300

    Try setting CURL_SSL_BACKEND to openssl

commit 680f83cbb8d3f2443ab956899871212a7c522d7b
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 12:36:02 2024 +0300

    Try to see the current env to see if R_LIBCURL_SSL_REVOKE_BEST_EFFORT is supplied correctly

commit 13e055b63b532d7bb445f83e7d72c1419184f406
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 11:33:40 2024 +0300

    Remove test travis step

commit 92d5f5f95868db6f7444a908cbbfbdaeb238704a
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 11:20:03 2024 +0300

    Check that appveyor is using the branch configuration

commit e3c193f69fd4d53e279f2b8fdb1cf674b6b5398f
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 11:13:52 2024 +0300

    Set R_LIBCURL_SSL_REVOKE_BEST_EFFORT in travis.yml

commit 710bab48c3f2eecabab5f54f32743cc2681138d3
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 11:08:24 2024 +0300

    Move R_LIBCURL_SSL_REVOKE_BEST_EFFORT environment variable outside of global section

commit b1d8d51aca0d00f15b40f1e03a6f133a0f19ddfb
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 10:35:42 2024 +0300

    Set R_LIBCURL_SSL_REVOKE_BEST_EFFORT to TRUE in string

commit 2701bc2be12d371e33689f141dc96631333faf8f
Author: roi-meir <meir.roi@gmail.com>
Date:   Mon Sep 30 10:13:31 2024 +0300

    SET R_LIBCURL_SSL_REVOKE_BEST_EFFORT to TRUE in appveyor script

    Deal with ssl problem in bioc_install command
---
 appveyor.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index 9c400d215..7a987f45a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -22,6 +22,8 @@ environment:
     PYTHON: "C:\\Python36-x64"
     RETICULATE_PYTHON: "C:\\Python36-x64"
     CRAN: "https://cloud.r-project.org"
+    R_LIBCURL_SSL_REVOKE_BEST_EFFORT: "TRUE"
+    CURL_SSL_BACKEND: "openssl"
 
 build_script:
   - travis-tool.sh install_deps

From 78f43e19aa5723369cbcbb4437e0c7a69198619f Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 11 Dec 2024 09:57:46 -0500
Subject: [PATCH 073/166] Add appveyor.yml back into .Rbuildignore

---
 .Rbuildignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.Rbuildignore b/.Rbuildignore
index e2d47f235..d203ce14f 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,6 +4,7 @@
 ^\.Rproj\.user$
 ^.*\.old$
 .git
+appveyor.yml
 cran-comments.md
 CODE_OF_CONDUCT.md
 ^_pkgdown\.yaml$

From 2b5a1c674bf3d948f5d919147f5de6046678ebfd Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 10 Dec 2024 14:15:59 -0500
Subject: [PATCH 074/166] Include `features` param in SketchData docstring

---
 R/sketching.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/sketching.R b/R/sketching.R
index c2e8535c5..a15654a51 100644
--- a/R/sketching.R
+++ b/R/sketching.R
@@ -31,6 +31,8 @@ NULL
 #' @param seed A positive integer for the seed of the random number generator. Default is 123.
 #' @param cast The type to cast the resulting assay to. Default is 'dgCMatrix'.
 #' @param verbose Print progress and diagnostic messages
+#' @param features A character vector of feature names to include in the
+#' sketched assay.
 #' @param ... Arguments passed to other methods
 #'
 #' @return A Seurat object with the sketched data added as a new assay.

From 04f4485de6fd7951dd0688b6a20e72aed914c045 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 10 Dec 2024 14:19:48 -0500
Subject: [PATCH 075/166] Update docs

---
 man/SketchData.Rd | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/man/SketchData.Rd b/man/SketchData.Rd
index 97ffc2d78..bc4998117 100644
--- a/man/SketchData.Rd
+++ b/man/SketchData.Rd
@@ -43,6 +43,9 @@ Default is 'LeverageScore'.}
 
 \item{verbose}{Print progress and diagnostic messages}
 
+\item{features}{A character vector of feature names to include in the
+sketched assay.}
+
 \item{...}{Arguments passed to other methods}
 }
 \value{

From f0db435ec240d1f96d3d8089053a7071d9b8976c Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 13 Dec 2024 07:23:09 -0500
Subject: [PATCH 076/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e18541d10..0529272e6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9007
+Version: 5.1.0.9008
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 611bc60f76dc6a6cc074200f30c79872e2dcd33c Mon Sep 17 00:00:00 2001
From: Diego Diez <diego10ruiz@gmail.com>
Date: Sun, 17 Dec 2023 00:02:06 +0900
Subject: [PATCH 077/166] Fix access to layer data in new Seurat V5.

---
 R/objects.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/objects.R b/R/objects.R
index 7553a5d84..b01df7344 100644
--- a/R/objects.R
+++ b/R/objects.R
@@ -601,13 +601,13 @@ DietSeurat <- function(
       }
       for (lyr in layers.rm) {
         suppressWarnings(object <- tryCatch(expr = {
-          object[[assay]][[lyr]] <- NULL
+          object[[assay]][lyr] <- NULL
           object
         }, error = function(e) {
           if (lyr == "data"){
-            object[[assay]][[lyr]] <- sparseMatrix(i = 1, j = 1, x = 1,
-                         dims = dim(object[[assay]][[lyr]]),
-                         dimnames = dimnames(object[[assay]][[lyr]]))
+            object[[assay]][lyr] <- sparseMatrix(i = 1, j = 1, x = 1,
+                         dims = dim(object[[assay]][lyr]),
+                         dimnames = dimnames(object[[assay]][lyr]))
           } else{
             slot(object = object[[assay]], name = lyr) <- new(Class = "dgCMatrix")
           }

From 28ad870817cbeb619d28b75520d48945dce5cd8e Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 16 Dec 2024 20:26:52 -0500
Subject: [PATCH 078/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0529272e6..065c12c65 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9008
+Version: 5.1.0.9009
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 2275144e7b98eb2afe030633f082b446011b360b Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 10 Dec 2024 14:00:54 -0500
Subject: [PATCH 079/166] Drop bad reference to `npcs` in RunSLSI.StdAssay

---
 R/dimensional_reduction.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 4450054fe..5fd882b15 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2770,7 +2770,6 @@ RunSLSI.StdAssay <- function(
   reduction.data <- RunSLSI(
     object = data.use,
     assay = assay,
-    npcs = npcs,
     reduction.key = reduction.key,
     graph = graph,
     verbose = verbose,

From 8941be92b6c3bc44e64ae78bd63ae93621b208eb Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 17 Dec 2024 07:42:26 -0500
Subject: [PATCH 080/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 065c12c65..93613327d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9009
+Version: 5.1.0.9010
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From aa296c2735649e5f38440b071eb05262ebc14b11 Mon Sep 17 00:00:00 2001
From: fspecque <f.specque@hotmail.fr>
Date: Wed, 10 Jan 2024 11:58:57 +0100
Subject: [PATCH 081/166] Fix umap-learn version check

---
 R/dimensional_reduction.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 5fd882b15..ecdeac184 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -1370,7 +1370,7 @@ RunUMAP.default <- function(
       umap_import <- import(module = "umap", delay_load = TRUE)
       sklearn <- import("sklearn", delay_load = TRUE)
       if (densmap &&
-          numeric_version(x = umap_import$pkg_resources$get_distribution("umap-learn")$version) <
+          numeric_version(x = umap_import$`__version__`) <
           numeric_version(x = "0.5.0")) {
         stop("densmap is only supported by versions >= 0.5.0 of umap-learn. Upgrade umap-learn (e.g. pip install --upgrade umap-learn).")
       }
@@ -1394,7 +1394,7 @@ RunUMAP.default <- function(
         angular_rp_forest = angular.rp.forest,
         verbose = verbose
       )
-      if (numeric_version(x = umap_import$pkg_resources$get_distribution("umap-learn")$version) >=
+      if (numeric_version(x = umap_import$`__version__`) >=
           numeric_version(x = "0.5.0")) {
         umap.args <- c(umap.args, list(
           densmap = densmap,
@@ -1613,7 +1613,7 @@ RunUMAP.Graph <- function(
     metric_kwds = metric.kwds,
     verbose = verbose
   )
-  if (numeric_version(x = umap$pkg_resources$get_distribution("umap-learn")$version) >=
+  if (numeric_version(x = umap$`__version__`) >=
       numeric_version(x = "0.5.0")) {
     umap.args <- c(umap.args, list(
       densmap = densmap,

From 216b673a6c57c6c3870cc44064cf8582c02992da Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 18 Dec 2024 08:34:15 -0500
Subject: [PATCH 082/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 93613327d..30e699d4a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9010
+Version: 5.1.0.9011
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 34bae9d96defb1ea9a3655c5bc10d5f7c02bdd4b Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Wed, 18 Dec 2024 11:24:56 -0500
Subject: [PATCH 083/166] Update changelog

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 2ad02f413..cc7169df2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
 - Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`
 - Added ability to load Xenium nucleus segmentation masks
 - Updated `LoadXenium` to also read some run metadata (run start time, preservation method, panel used, organism, tissue type, instrument software version and stain kit used) into `misc` slot
@@ -238,6 +239,7 @@
 - Add `rlsi` option for `FindIntegrationAnchors()`
 
 ## Changes
+- 
 - Preserve feature metadata when converting from `SingleCellExperiment` to `SeuratObject` class
 ([#4205](https://github.com/satijalab/seurat/issues/4205))
 - Preserve multiple assays when converting from `SingleCellExperiment` to `SeuratObject` class

From 676cd6b0c5c2e52f012c1d6e3a2cda58402c8f5f Mon Sep 17 00:00:00 2001
From: Ben Parks <bnprks+git@gmail.com>
Date: Sat, 6 Jan 2024 13:54:17 -0800
Subject: [PATCH 084/166] Fix JackStraw with BPCells objects

---
 R/dimensional_reduction.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index ecdeac184..4f1d0f84f 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -2335,7 +2335,7 @@ JackRandom <- function(
     rand.genes <- sample(x = rownames(x = scaled.data), size = 3)
   }
   data.mod <- scaled.data
-  data.mod[rand.genes, ] <- MatrixRowShuffle(x = scaled.data[rand.genes, ])
+  data.mod[rand.genes, ] <- MatrixRowShuffle(x = as.matrix(scaled.data[rand.genes, ]))
   temp.object <- RunPCA(
     object = data.mod,
     assay = "temp",

From 626e78ae3a712b858ac0febffe4700f7e3e3c037 Mon Sep 17 00:00:00 2001
From: Ben Parks <bnprks+git@gmail.com>
Date: Sat, 6 Jan 2024 13:54:52 -0800
Subject: [PATCH 085/166] Improve PCA performance with BPCells

---
 R/dimensional_reduction.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index 4f1d0f84f..dd1b9f4bb 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -865,19 +865,22 @@ RunPCA.default <- function(
   }
  if (inherits(x = object, what = 'matrix')) {
    RowVar.function <- RowVar
+   svd.function <- irlba
  } else if (inherits(x = object, what = 'dgCMatrix')) {
    RowVar.function <- RowVarSparse
+   svd.function <- irlba
  } else if (inherits(x = object, what = 'IterableMatrix')) {
    RowVar.function <- function(x) {
      return(BPCells::matrix_stats(
        matrix = x,
        row_stats = 'variance'
      )$row_stats['variance',])
-     }
+    }
+    svd.function <- function(A, nv, ...) BPCells::svds(A=A, k = nv)
  }
   if (rev.pca) {
     npcs <- min(npcs, ncol(x = object) - 1)
-    pca.results <- irlba(A = object, nv = npcs, ...)
+    pca.results <- svd.function(A = object, nv = npcs, ...)
     total.variance <- sum(RowVar.function(x = t(x = object)))
     sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1))
     if (weight.by.var) {
@@ -891,7 +894,7 @@ RunPCA.default <- function(
     total.variance <- sum(RowVar.function(x = object))
     if (approx) {
       npcs <- min(npcs, nrow(x = object) - 1)
-      pca.results <- irlba(A = t(x = object), nv = npcs, ...)
+      pca.results <- svd.function(A = t(x = object), nv = npcs, ...)
       feature.loadings <- pca.results$v
       sdev <- pca.results$d/sqrt(max(1, ncol(object) - 1))
       if (weight.by.var) {

From 91c8dd7e6807f14786e128f57c9ed437bb56d226 Mon Sep 17 00:00:00 2001
From: Ben Parks <bnprks+git@gmail.com>
Date: Wed, 18 Dec 2024 16:16:51 -0800
Subject: [PATCH 086/166] Update changelog and version for pull #8271

---
 DESCRIPTION | 2 +-
 NEWS.md     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 30e699d4a..93fbc3e78 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9011
+Version: 5.1.0.9012
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(
diff --git a/NEWS.md b/NEWS.md
index cc7169df2..b07f06694 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,8 @@
 - Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`
 - Updated `SketchData`'s `ncells` parameter to accept integer vector
+- Updated `JackStraw` to support `BPCells` matrices
+- Updated `RunPCA` to use the `BPCells`-provided SVD solver on `BPCells` matrices
 
 # Seurat 5.1.0 (2024-05-08)
 

From 708973f132c0c76ed1c2a09cbce5cc367f1a4330 Mon Sep 17 00:00:00 2001
From: Alan O'Callaghan <alan.ocallaghan@outlook.com>
Date: Fri, 16 Dec 2022 20:00:18 +0000
Subject: [PATCH 087/166] Switch from leiden to leidenbase

---
 R/clustering.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index c20719b92..6ffe36550 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1706,17 +1706,17 @@ RunLeiden <- function(
     },
     stop("Method for Leiden must be either 'matrix' or igraph'")
   )
-  #run leiden from CRAN package (calls python with reticulate)
-  partition <- leiden(
+  # run clustering with leidenbase
+  partition <- leidenbase::leiden_find_partition(
     object = input,
     partition_type = partition.type,
     initial_membership = initial.membership,
-    weights = NULL,
+    edge_weights = NULL,
     node_sizes = node.sizes,
     resolution_parameter = resolution.parameter,
     seed = random.seed,
-    n_iterations = n.iter
-  )
+    num_iter = n.iter
+  )$membership
   return(partition)
 }
 

From 0d9c2a37c24aa4b24445b97881da86d3ad6d7cbc Mon Sep 17 00:00:00 2001
From: Alan O'Callaghan <alan.ocallaghan@outlook.com>
Date: Fri, 16 Dec 2022 20:10:03 +0000
Subject: [PATCH 088/166] Add leidenbase to DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 93fbc3e78..7bdbe2abe 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -56,7 +56,7 @@ Imports:
     irlba,
     jsonlite,
     KernSmooth,
-    leiden (>= 0.3.1),
+    leidenbase,
     lifecycle,
     lmtest,
     MASS,

From e07f4ae91b864dd789e9afa0cbe339a4acf2cb8a Mon Sep 17 00:00:00 2001
From: Alan O'Callaghan <alan.ocallaghan@outlook.com>
Date: Tue, 27 Dec 2022 13:08:23 +0000
Subject: [PATCH 089/166] Remove import

---
 R/clustering.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/clustering.R b/R/clustering.R
index 6ffe36550..152734548 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1650,7 +1650,6 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) {
 #
 # @keywords graph network igraph mvtnorm simulation
 #
-#' @importFrom leiden leiden
 #' @importFrom reticulate py_module_available
 #' @importFrom igraph graph_from_adjacency_matrix graph_from_adj_list
 #

From 73d2e9f2f0b5632c3c1968a28731ecf772c24451 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:10:35 -0500
Subject: [PATCH 090/166] Fixup leidenbase::leiden_find_partition call

---
 R/clustering.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/clustering.R b/R/clustering.R
index 152734548..7e298e093 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1707,7 +1707,7 @@ RunLeiden <- function(
   )
   # run clustering with leidenbase
   partition <- leidenbase::leiden_find_partition(
-    object = input,
+    input,
     partition_type = partition.type,
     initial_membership = initial.membership,
     edge_weights = NULL,

From 4ed1b1992005d8eb4f1763ed7b8fdb69622d095c Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:11:24 -0500
Subject: [PATCH 091/166] Drop py_module_available check for leidenalg

---
 R/clustering.R | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index 7e298e093..e68c62b72 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1675,12 +1675,6 @@ RunLeiden <- function(
   random.seed = 0,
   n.iter = 10
 ) {
-  if (!py_module_available(module = 'leidenalg')) {
-    stop(
-      "Cannot find Leiden algorithm, please install through pip (e.g. pip install leidenalg).",
-      call. = FALSE
-    )
-  }
   switch(
     EXPR = method,
     "matrix" = {

From 8298ead6d1bb1c73b42e610c05a55ce1ea2373c3 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:19:32 -0500
Subject: [PATCH 092/166] Deprecate the method parameter for RunLeiden

---
 R/clustering.R | 55 +++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index e68c62b72..d13207fb0 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1659,7 +1659,7 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) {
 #
 RunLeiden <- function(
   object,
-  method = c("matrix", "igraph"),
+  method = deprecated(),
   partition.type = c(
     'RBConfigurationVertexPartition',
     'ModularityVertexPartition',
@@ -1675,30 +1675,35 @@ RunLeiden <- function(
   random.seed = 0,
   n.iter = 10
 ) {
-  switch(
-    EXPR = method,
-    "matrix" = {
-      input <- as(object = object, Class = "matrix")
-    },
-    "igraph" = {
-      input <- if (inherits(x = object, what = 'list')) {
-        graph_from_adj_list(adjlist = object)
-      } else if (inherits(x = object, what = c('dgCMatrix', 'matrix', 'Matrix'))) {
-        if (inherits(x = object, what = 'Graph')) {
-          object <- as.sparse(x = object)
-        }
-        graph_from_adjacency_matrix(adjmatrix = object, weighted = TRUE)
-      } else if (inherits(x = object, what = 'igraph')) {
-        object
-      } else {
-        stop(
-          "Method for Leiden not found for class", class(x = object),
-          call. = FALSE
-        )
-      }
-    },
-    stop("Method for Leiden must be either 'matrix' or igraph'")
-  )
+  # The `method` parameter was deprecated after switching from the `leiden`
+  # package to `leidenbase` to run the algorithm. Unlike `leiden`, `leidenbase`
+  # _requires_ an `igraph` input, so the parameter no longer makes sense. The
+  # good news is that `leidenbase` is much faster than `leiden` so it shouldn't
+  # really matter. 
+  if (is_present(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "RunLeiden(method)"
+    )
+  }
+  
+  # Convert `object` into an `igraph`.
+  input <- if (inherits(x = object, what = 'list')) {
+    graph_from_adj_list(adjlist = object)
+  } else if (inherits(x = object, what = c('dgCMatrix', 'matrix', 'Matrix'))) {
+    if (inherits(x = object, what = 'Graph')) {
+      object <- as.sparse(x = object)
+    }
+    graph_from_adjacency_matrix(adjmatrix = object, weighted = TRUE)
+  } else if (inherits(x = object, what = 'igraph')) {
+    object
+  } else {
+    stop(
+      "Method for Leiden not found for class", class(x = object),
+      call. = FALSE
+    )
+  }
+
   # run clustering with leidenbase
   partition <- leidenbase::leiden_find_partition(
     input,

From 80c9f59271a5e55b9f99bb9c176fdc1f374d777f Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:27:45 -0500
Subject: [PATCH 093/166] Avoid random.seed <= 0 in RunLeiden

---
 R/clustering.R | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/R/clustering.R b/R/clustering.R
index d13207fb0..53395d922 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1672,9 +1672,22 @@ RunLeiden <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution.parameter = 1,
-  random.seed = 0,
+  random.seed = 1,
   n.iter = 10
 ) {
+  # `leidenbase::leiden_find_partition` requires it's `seed` parameter to be
+  # greater than 0 (or NULL) but the default value for `FindClusters` is 0. 
+  # If `random.seed` is 0 or less, throw a warning and reset the value to 1. 
+   if (!is.null(random.seed) && random.seed <= 0) {
+    warning(
+      paste0(
+        "`random.seed` must be greater than 0 for leiden clustering, ",
+        "resetting `random.seed` to 1."
+      )
+    )
+    random.seed <- 1
+  }
+
   # The `method` parameter was deprecated after switching from the `leiden`
   # package to `leidenbase` to run the algorithm. Unlike `leiden`, `leidenbase`
   # _requires_ an `igraph` input, so the parameter no longer makes sense. The

From 8cec0340a85d30cace85fcf1523c3afee6a38c7e Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:35:28 -0500
Subject: [PATCH 094/166] Tidy RunLeiden

---
 R/clustering.R | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index 53395d922..f4e4b5db4 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1701,23 +1701,30 @@ RunLeiden <- function(
   }
   
   # Convert `object` into an `igraph`.
-  input <- if (inherits(x = object, what = 'list')) {
-    graph_from_adj_list(adjlist = object)
-  } else if (inherits(x = object, what = c('dgCMatrix', 'matrix', 'Matrix'))) {
-    if (inherits(x = object, what = 'Graph')) {
-      object <- as.sparse(x = object)
+  # If `object` is already an `igraph` no conversion is necessary.
+  if (inherits(object, what = "igraph")) { 
+    input <- object
+  # Otherwise, if `object` is a list, assume it is an adjacency list...
+  } else if (inherits(object, what = "list")) {
+    # And convert it to an `igraph` with the appropriate method. 
+    input <- graph_from_adj_list(object)
+  # Or, if `object` is a matrix...
+  } else if (inherits(object, what = c("dgCMatrix", "matrix", "Matrix"))) {
+    # Make sure the matrix is sparse.
+    if (inherits(object, what = "Graph")) {
+      object <- as.sparse(object)
     }
-    graph_from_adjacency_matrix(adjmatrix = object, weighted = TRUE)
-  } else if (inherits(x = object, what = 'igraph')) {
-    object
+    # And then convert it to an graph.
+    input <- graph_from_adjacency_matrix(object, weighted = TRUE)
+  # Throw an error if `object` is of an unknown type. 
   } else {
     stop(
-      "Method for Leiden not found for class", class(x = object),
+      "Method for Leiden not found for class", class(object),
       call. = FALSE
     )
   }
 
-  # run clustering with leidenbase
+  # Run clustering with `leidenbase`.
   partition <- leidenbase::leiden_find_partition(
     input,
     partition_type = partition.type,

From f3e31a6e523b7d906858d133d4dfb881c5064f35 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:53:57 -0500
Subject: [PATCH 095/166] Update docstring for RunLeiden

---
 R/clustering.R | 52 ++++++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index f4e4b5db4..bdd10c582 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -1629,34 +1629,32 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) {
   return(n.ob)
 }
 
-# Run Leiden clustering algorithm
-#
-# Implements the Leiden clustering algorithm in R using reticulate
-# to run the Python version. Requires the python "leidenalg" and "igraph" modules
-# to be installed. Returns a vector of partition indices.
-#
-# @param adj_mat An adjacency matrix or SNN matrix
-# @param partition.type Type of partition to use for Leiden algorithm.
-# Defaults to RBConfigurationVertexPartition. Options include: ModularityVertexPartition,
-# RBERVertexPartition, CPMVertexPartition, MutableVertexPartition,
-# SignificanceVertexPartition, SurpriseVertexPartition (see the Leiden python
-# module documentation for more details)
-# @param initial.membership,node.sizes Parameters to pass to the Python leidenalg function.
-# @param resolution.parameter A parameter controlling the coarseness of the clusters
-# for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for
-# partition types that accept a resolution parameter)
-# @param random.seed Seed of the random number generator
-# @param n.iter Maximal number of iterations per random start
-#
-# @keywords graph network igraph mvtnorm simulation
-#
-#' @importFrom reticulate py_module_available
+#' Run Leiden clustering algorithm
+#'
+#' Returns a vector of partition indices.
+#'
+#' @param object An adjacency matrix or adjacency list. 
+#' @param method DEPRECATED.
+#' @param partition.type Type of partition to use for Leiden algorithm.
+#' Defaults to "RBConfigurationVertexPartition", see 
+#' https://cran.rstudio.com/web/packages/leidenbase/leidenbase.pdf for more options.
+#' @param initial.membership Passed to the `initial_membership` parameter
+#' of `leidenbase::leiden_find_partition`.
+#' @param node.sizes Passed to the `node_sizes` parameter of
+#' `leidenbase::leiden_find_partition`.
+#' @param resolution.parameter A parameter controlling the coarseness of the clusters
+#' for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for
+#' partition types that accept a resolution parameter)
+#' @param random.seed Seed of the random number generator, must be greater than 0.
+#' @param n.iter Maximal number of iterations per random start
+#'
 #' @importFrom igraph graph_from_adjacency_matrix graph_from_adj_list
-#
-# @author Tom Kelly
-#
-# @export
-#
+#'
+#' @export
+#' 
+#' @rdname RunLeiden
+#' @concept clustering
+#' 
 RunLeiden <- function(
   object,
   method = deprecated(),

From a08b238427555e2207ede60b2b244fd7615f8c70 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 08:56:06 -0500
Subject: [PATCH 096/166] Deprecate method parameter for FindClusters

---
 R/clustering.R | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index bdd10c582..7060b7e23 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -275,14 +275,16 @@ PredictAssay <- function(
 #' @importFrom future nbrOfWorkers
 #'
 #' @param modularity.fxn Modularity function (1 = standard; 2 = alternative).
-#' @param initial.membership,node.sizes Parameters to pass to the Python leidenalg function.
+#' @param initial.membership Passed to the `initial_membership` parameter
+#' of `leidenbase::leiden_find_partition`.
+#' @param node.sizes Passed to the `node_sizes` parameter of
+#' `leidenbase::leiden_find_partition`.
 #' @param resolution Value of the resolution parameter, use a value above
 #' (below) 1.0 if you want to obtain a larger (smaller) number of communities.
 #' @param algorithm Algorithm for modularity optimization (1 = original Louvain
 #' algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM
-#' algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python.
-#' @param method Method for running leiden (defaults to matrix which is fast for small datasets).
-#' Enable method = "igraph" to avoid casting large data to a dense matrix.
+#' algorithm; 4 = Leiden algorithm).
+#' @param method DEPRECATED.
 #' @param n.start Number of random starts.
 #' @param n.iter Maximal number of iterations per random start.
 #' @param random.seed Seed of the random number generator.
@@ -303,7 +305,7 @@ FindClusters.default <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  method = deprecated(),
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -315,6 +317,14 @@ FindClusters.default <- function(
   ...
 ) {
   CheckDots(...)
+  # The `method` parameter is for `RunLeiden` but was deprecated, see
+  # function for more details.
+  if (is_present(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "FindClusters(method)"
+    )
+  }
   if (is.null(x = object)) {
     stop("Please provide an SNN graph")
   }
@@ -344,7 +354,6 @@ FindClusters.default <- function(
         } else if (algorithm == 4) {
           ids <- RunLeiden(
             object = object,
-            method = method,
             partition.type = "RBConfigurationVertexPartition",
             initial.membership = initial.membership,
             node.sizes = node.sizes,
@@ -418,7 +427,7 @@ FindClusters.Seurat <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  method = deprecated(),
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -430,6 +439,15 @@ FindClusters.Seurat <- function(
   ...
 ) {
   CheckDots(...)
+  # Since we're throwing a soft deprecation warning, it needs to be duplicated
+  # for each implementation of the `FindClusters` generic, see
+  # `FindCluster.default` for more details.
+  if (is_present(method)) {
+    deprecate_soft(
+      when = "5.2.0",
+      what = "FindClusters(method)"
+    )
+  }
   graph.name <- graph.name %||% paste0(DefaultAssay(object = object), "_snn")
   if (!graph.name %in% names(x = object)) {
     stop("Provided graph.name not present in Seurat object")
@@ -443,7 +461,6 @@ FindClusters.Seurat <- function(
     initial.membership = initial.membership,
     node.sizes = node.sizes,
     resolution = resolution,
-    method = method,
     algorithm = algorithm,
     n.start = n.start,
     n.iter = n.iter,

From e9452c52eda3cdb20510617a93ccf49db1274879 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 10:05:34 -0500
Subject: [PATCH 097/166] Update default method value for FindCluster.Seurat

LogSeuratCommand doesn't like deprecated(), use NULL instead
---
 R/clustering.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index 7060b7e23..902034510 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -427,7 +427,8 @@ FindClusters.Seurat <- function(
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = deprecated(),
+  # ToDo: Update `LogSeuratCommand` to accommodate deprecated parameters.
+  method = NULL,
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -442,7 +443,7 @@ FindClusters.Seurat <- function(
   # Since we're throwing a soft deprecation warning, it needs to be duplicated
   # for each implementation of the `FindClusters` generic, see
   # `FindCluster.default` for more details.
-  if (is_present(method)) {
+  if (!is.null(method)) {
     deprecate_soft(
       when = "5.2.0",
       what = "FindClusters(method)"

From deb8e36a57c06373a9f4abf59d520d2d0327566f Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 10:14:42 -0500
Subject: [PATCH 098/166] Add smoke test for FindClusters

---
 tests/testthat/test_find_clusters.R | 40 +++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 tests/testthat/test_find_clusters.R

diff --git a/tests/testthat/test_find_clusters.R b/tests/testthat/test_find_clusters.R
new file mode 100644
index 000000000..e7d08b3b5
--- /dev/null
+++ b/tests/testthat/test_find_clusters.R
@@ -0,0 +1,40 @@
+path_to_counts <- system.file("extdata", "pbmc_raw.txt", package = "Seurat")
+
+
+# Builds a `Seurat` instance and annotates it with the requisite data
+# structures for running `FindClusters` (i.e. a shared-nearest-neighbor
+# (SNN) graph).
+get_test_data <- function() {
+  raw_counts <- read.table(path_to_counts, sep = "\t", row.names = 1)
+  counts <- as.sparse(as.matrix(raw_counts))
+  assay <- CreateAssay5Object(counts)
+  test_data <- CreateSeuratObject(assay)
+
+  test_data <- NormalizeData(test_data, verbose = FALSE)
+  test_data <- FindVariableFeatures(test_data, verbose = FALSE)
+  test_data <- ScaleData(test_data, verbose = FALSE)
+  # Reduce number of PCs to avoid warning from `irlba` caused by the
+  # small size of the dataset being used.
+  test_data <- RunPCA(test_data, npcs = 20, verbose = FALSE)
+  test_data <- FindNeighbors(test_data, dims = 1:20, verbose = FALSE)
+
+  return(test_data)
+}
+
+
+context("FindClusters")
+
+
+test_that("Smoke test for `FindClusters`", {
+  test_case <- get_test_data()
+
+  # Check that every clustering algorithm can be run without errors.
+  expect_no_error(FindClusters(test_case))
+  expect_no_error(FindClusters(test_case, algorithm = 1))
+  expect_no_error(FindClusters(test_case, algorithm = 2))
+  expect_no_error(FindClusters(test_case, algorithm = 3))
+  # The leiden algorithm requires that `random.seed` be greater than 0,
+  # which is the default for `FindClusters` so a warning should be raised.
+  expect_warning(FindClusters(test_case, algorithm = 4))
+  expect_no_warning(FindClusters(test_case, algorithm = 4, random.seed = 1))
+})

From 375c1881a90b7769d59dd7d75609d793a0a2f466 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 11:06:19 -0500
Subject: [PATCH 099/166] Expand FindClusters smoke test with spot checks

---
 tests/testthat/test_find_clusters.R | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test_find_clusters.R b/tests/testthat/test_find_clusters.R
index e7d08b3b5..0da9eb587 100644
--- a/tests/testthat/test_find_clusters.R
+++ b/tests/testthat/test_find_clusters.R
@@ -14,9 +14,15 @@ get_test_data <- function() {
   test_data <- FindVariableFeatures(test_data, verbose = FALSE)
   test_data <- ScaleData(test_data, verbose = FALSE)
   # Reduce number of PCs to avoid warning from `irlba` caused by the
-  # small size of the dataset being used.
-  test_data <- RunPCA(test_data, npcs = 20, verbose = FALSE)
-  test_data <- FindNeighbors(test_data, dims = 1:20, verbose = FALSE)
+  # small size of the dataset being used. Plus, we only want to build our
+  # SNN graph using the first 10 PCs to get "interesting" clustering results.
+  test_data <- RunPCA(test_data, npcs = 10, verbose = FALSE)
+  test_data <- FindNeighbors(
+    test_data,
+    k.param=10,
+    dims = 1:10,
+    verbose = FALSE
+  )
 
   return(test_data)
 }
@@ -28,8 +34,15 @@ context("FindClusters")
 test_that("Smoke test for `FindClusters`", {
   test_case <- get_test_data()
 
+  # Spot check cluster assignments with using defaults.
+  results <- FindClusters(test_case)$seurat_clusters
+  expect_equal(results[[1]], factor(3, levels=0:5))
+  expect_equal(results[[15]], factor(4, levels=0:5))
+  expect_equal(results[[24]], factor(0, levels=0:5))
+  expect_equal(results[[72]], factor(5, levels=0:5))
+  expect_equal(results[[length(results)]], factor(2, levels=0:5))
+
   # Check that every clustering algorithm can be run without errors.
-  expect_no_error(FindClusters(test_case))
   expect_no_error(FindClusters(test_case, algorithm = 1))
   expect_no_error(FindClusters(test_case, algorithm = 2))
   expect_no_error(FindClusters(test_case, algorithm = 3))

From e51654490ecc72f5245f3a51c0f24149bccf7f68 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 09:28:44 -0500
Subject: [PATCH 100/166] Update docs

---
 NAMESPACE             |  2 +-
 man/FindClusters.Rd   | 15 ++++++++------
 man/FindSubCluster.Rd |  2 +-
 man/RunLeiden.Rd      | 46 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 8 deletions(-)
 create mode 100644 man/RunLeiden.Rd

diff --git a/NAMESPACE b/NAMESPACE
index dea8dcb78..910b3f493 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -385,6 +385,7 @@ export(RunCCA)
 export(RunGraphLaplacian)
 export(RunICA)
 export(RunLDA)
+export(RunLeiden)
 export(RunMarkVario)
 export(RunMixscape)
 export(RunMoransI)
@@ -754,7 +755,6 @@ importFrom(igraph,plot.igraph)
 importFrom(irlba,irlba)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,read_json)
-importFrom(leiden,leiden)
 importFrom(lifecycle,deprecate_soft)
 importFrom(lifecycle,deprecate_stop)
 importFrom(lifecycle,deprecate_warn)
diff --git a/man/FindClusters.Rd b/man/FindClusters.Rd
index 513cf8f36..2cdd35a70 100644
--- a/man/FindClusters.Rd
+++ b/man/FindClusters.Rd
@@ -14,7 +14,7 @@ FindClusters(object, ...)
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  method = deprecated(),
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -34,7 +34,7 @@ FindClusters(object, ...)
   initial.membership = NULL,
   node.sizes = NULL,
   resolution = 0.8,
-  method = "matrix",
+  method = NULL,
   algorithm = 1,
   n.start = 10,
   n.iter = 10,
@@ -53,17 +53,20 @@ FindClusters(object, ...)
 
 \item{modularity.fxn}{Modularity function (1 = standard; 2 = alternative).}
 
-\item{initial.membership, node.sizes}{Parameters to pass to the Python leidenalg function.}
+\item{initial.membership}{Passed to the `initial_membership` parameter
+of `leidenbase::leiden_find_partition`.}
+
+\item{node.sizes}{Passed to the `node_sizes` parameter of
+`leidenbase::leiden_find_partition`.}
 
 \item{resolution}{Value of the resolution parameter, use a value above
 (below) 1.0 if you want to obtain a larger (smaller) number of communities.}
 
-\item{method}{Method for running leiden (defaults to matrix which is fast for small datasets).
-Enable method = "igraph" to avoid casting large data to a dense matrix.}
+\item{method}{DEPRECATED.}
 
 \item{algorithm}{Algorithm for modularity optimization (1 = original Louvain
 algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM
-algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python.}
+algorithm; 4 = Leiden algorithm).}
 
 \item{n.start}{Number of random starts.}
 
diff --git a/man/FindSubCluster.Rd b/man/FindSubCluster.Rd
index cff7b8823..1c48c240c 100644
--- a/man/FindSubCluster.Rd
+++ b/man/FindSubCluster.Rd
@@ -27,7 +27,7 @@ FindSubCluster(
 
 \item{algorithm}{Algorithm for modularity optimization (1 = original Louvain
 algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM
-algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python.}
+algorithm; 4 = Leiden algorithm).}
 }
 \value{
 return a object with sub cluster labels in the sub-cluster.name variable
diff --git a/man/RunLeiden.Rd b/man/RunLeiden.Rd
new file mode 100644
index 000000000..5514a751c
--- /dev/null
+++ b/man/RunLeiden.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clustering.R
+\name{RunLeiden}
+\alias{RunLeiden}
+\title{Run Leiden clustering algorithm}
+\usage{
+RunLeiden(
+  object,
+  method = deprecated(),
+  partition.type = c("RBConfigurationVertexPartition", "ModularityVertexPartition",
+    "RBERVertexPartition", "CPMVertexPartition", "MutableVertexPartition",
+    "SignificanceVertexPartition", "SurpriseVertexPartition"),
+  initial.membership = NULL,
+  node.sizes = NULL,
+  resolution.parameter = 1,
+  random.seed = 1,
+  n.iter = 10
+)
+}
+\arguments{
+\item{object}{An adjacency matrix or adjacency list.}
+
+\item{method}{DEPRECATED.}
+
+\item{partition.type}{Type of partition to use for Leiden algorithm.
+Defaults to "RBConfigurationVertexPartition", see 
+https://cran.rstudio.com/web/packages/leidenbase/leidenbase.pdf for more options.}
+
+\item{initial.membership}{Passed to the `initial_membership` parameter
+of `leidenbase::leiden_find_partition`.}
+
+\item{node.sizes}{Passed to the `node_sizes` parameter of
+`leidenbase::leiden_find_partition`.}
+
+\item{resolution.parameter}{A parameter controlling the coarseness of the clusters
+for Leiden algorithm. Higher values lead to more clusters. (defaults to 1.0 for
+partition types that accept a resolution parameter)}
+
+\item{random.seed}{Seed of the random number generator, must be greater than 0.}
+
+\item{n.iter}{Maximal number of iterations per random start}
+}
+\description{
+Returns a vector of partition indices.
+}
+\concept{clustering}

From 6c0184eac45d925e6e69ec709f044450668ba2dd Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 11:26:10 -0500
Subject: [PATCH 101/166] Update changelog

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index b07f06694..55c05cf81 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))
 - Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
 - Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`
 - Added ability to load Xenium nucleus segmentation masks

From 7b0d53dc4488b1e996072554681a9498271cb466 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 11:27:13 -0500
Subject: [PATCH 102/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7bdbe2abe..9e18a1d37 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9012
+Version: 5.1.0.9013
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 0d4fe4bffd118efceb4b546ededd0d46c8ee3e02 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:33:46 -0500
Subject: [PATCH 103/166] PR #9342

---
 vignettes/multimodal_vignette.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/multimodal_vignette.Rmd b/vignettes/multimodal_vignette.Rmd
index 7f9193618..ac0a8f944 100644
--- a/vignettes/multimodal_vignette.Rmd
+++ b/vignettes/multimodal_vignette.Rmd
@@ -129,7 +129,7 @@ DefaultAssay(cbmc) <- 'RNA'
 # Note that the following command is an alternative but returns the same result
 cbmc <- NormalizeData(cbmc, normalization.method = 'CLR', margin = 2, assay = 'ADT')
 
-# Now, we will visualize CD14 levels for RNA and protein
+# Now, we will visualize CD19 levels for RNA and protein
 # By setting the default assay, we can visualize one or the other
 DefaultAssay(cbmc) <- 'ADT'
 p1 <- FeaturePlot(cbmc, "CD19",cols = c("lightgrey","darkgreen")) + ggtitle("CD19 protein")

From 6437c171c562920710f273902b0086f26c47bc34 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:34:49 -0500
Subject: [PATCH 104/166] PR #9239

---
 R/visualization.R | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/visualization.R b/R/visualization.R
index d56389c06..fdacc7bda 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -1024,7 +1024,7 @@ DimPlot <- function(
 #'   \item \dQuote{all} (universal scaling): The plots for all features and
 #'     conditions are scaled to the maximum expression value for the feature
 #'     with the highest overall expression
-#'   \item \code{all} (no scaling): Each individual plot is scaled to the
+#'   \item \code{NULL} (no scaling): Each individual plot is scaled to the
 #'     maximum expression value of the feature in the condition provided to
 #'     \code{split.by}. Be aware setting \code{NULL} will result in color
 #'     scales that are not comparable between plots
@@ -3633,9 +3633,9 @@ ISpatialDimPlot <- function(
           coords.hover <- hovered[1, colnames(coords)[1:2]] / scale.factor
           group.hover <- hovered[1, group.by]
           sprintf(
-            "Cell: %s, Group: %s, Coordinates: (%.2f, %.2f)", 
-            cell.hover, 
-            group.hover, 
+            "Cell: %s, Group: %s, Coordinates: (%.2f, %.2f)",
+            cell.hover,
+            group.hover,
             coords.hover[[1]],
             coords.hover[[2]]
           )
@@ -3867,7 +3867,7 @@ ISpatialFeaturePlot <- function(
 #' default, ggplot2 assigns colors
 #' @param image.alpha Adjust the opacity of the background images. Set to 0 to
 #' remove.
-#' @param image.scale Choose the scale factor ("lowres"/"hires") to apply in 
+#' @param image.scale Choose the scale factor ("lowres"/"hires") to apply in
 #' order to matchthe plot with the specified `image` - defaults to "lowres"
 #' @param crop Crop the plot in to focus on points plotted. Set to \code{FALSE} to show
 #' entire background image.
@@ -3911,8 +3911,8 @@ ISpatialFeaturePlot <- function(
 #' @param alpha Controls opacity of spots. Provide as a vector specifying the
 #' min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single
 #' alpha value for each plot.
-#' @param shape Control the shape of the spots - same as the ggplot2 parameter. 
-#' The default is 21, which plots circles - use 22 to plot squares. 
+#' @param shape Control the shape of the spots - same as the ggplot2 parameter.
+#' The default is 21, which plots circles - use 22 to plot squares.
 #' @param stroke Control the width of the border around the spots
 #' @param interactive Launch an interactive SpatialDimPlot or SpatialFeaturePlot
 #' session, see \code{\link{ISpatialDimPlot}} or
@@ -8979,7 +8979,7 @@ SingleRasterMap <- function(
 #' colors
 #' @param image.alpha Adjust the opacity of the background images. Set to 0 to
 #' remove.
-#' @param image.scale Choose the scale factor ("lowres"/"hires") to apply in 
+#' @param image.scale Choose the scale factor ("lowres"/"hires") to apply in
 #' order to matchthe plot with the specified `image` - defaults to "lowres"
 #' @param pt.alpha Adjust the opacity of the points if plotting a
 #' \code{SpatialDimPlot}
@@ -8987,8 +8987,8 @@ SingleRasterMap <- function(
 #' to show entire background image.
 #' @param pt.size.factor Sets the size of the points relative to spot.radius
 #' @param stroke Control the width of the border around the spots
-#' @param shape Control the shape of the spots - same as the ggplot2 parameter. 
-#' The default is 21, which plots cirlces - use 22 to plot squares. 
+#' @param shape Control the shape of the spots - same as the ggplot2 parameter.
+#' The default is 21, which plots cirlces - use 22 to plot squares.
 #' @param col.by Mapping variable for the point color
 #' @param alpha.by Mapping variable for the point alpha value
 #' @param cells.highlight A list of character or numeric vectors of cells to

From 9410cfb637f9af62e426e0bf66acec0b4faba0d9 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:36:24 -0500
Subject: [PATCH 105/166] #9155

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 8d7b01eef..067e2c9a5 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -4036,7 +4036,7 @@ SubsetByBarcodeInflections <- function(object) {
 #'   \item \dQuote{\code{mean.var.plot}} (mvp): First, uses a function to
 #'     calculate average expression (mean.function) and dispersion
 #'     (dispersion.function) for each feature. Next, divides features into
-#'     \code{num.bin} (deafult 20) bins based on their average expression,
+#'     \code{num.bin} (default 20) bins based on their average expression,
 #'     and calculates z-scores for dispersion within each bin. The purpose of
 #'     this is to identify variable features while controlling for the
 #'     strong relationship between variability and average expression

From 0f3f14fd966b3c1c26b033c347fea902bcd38e5c Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:40:21 -0500
Subject: [PATCH 106/166] other typos

---
 R/preprocessing.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 067e2c9a5..6581d2e5a 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -32,7 +32,7 @@ globalVariables(
 #' @param object Seurat object
 #' @param barcode.column Column to use as proxy for barcodes ("nCount_RNA" by default)
 #' @param group.column Column to group by ("orig.ident" by default)
-#' @param threshold.high Ignore barcodes of rank above thisf threshold in inflection calculation
+#' @param threshold.high Ignore barcodes of rank above this threshold in inflection calculation
 #' @param threshold.low Ignore barcodes of rank below this threshold in inflection calculation
 #'
 #' @return Returns Seurat object with a new list in the `tools` slot, `CalculateBarcodeInflections` with values:
@@ -4061,7 +4061,7 @@ SubsetByBarcodeInflections <- function(object) {
 #'     x-axis (default)
 #'   \item \dQuote{\code{equal_frequency}}: each bin contains an equal number
 #'     of features (can increase statistical power to detect overdispersed
-#'     eatures at high expression values, at the cost of reduced resolution
+#'     features at high expression values, at the cost of reduced resolution
 #'     along the x-axis)
 #' }
 #' @param verbose show progress bar for calculations

From 2ed7a4817eb8d40d1e650e95c3f4716109fe8766 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:41:47 -0500
Subject: [PATCH 107/166] #9083

---
 R/integration.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/integration.R b/R/integration.R
index e9dd3c3ca..2264783f0 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -7484,7 +7484,7 @@ ProjectDimReduc <- function(query,
 #' @param bridge.query.assay Assay name for bridge used for query mapping. ATAC by default
 #' @param supervised.reduction Type of supervised dimensional reduction to be performed
 #' for integrating the bridge and query.
-#' #' Options are:
+#' Options are:
 #' \itemize{
 #'    \item{slsi: Perform supervised LSI as the dimensional reduction for
 #'    the bridge-query integration}
@@ -7810,7 +7810,7 @@ FindBridgeIntegrationAnchors <- function(
 #'
 #' This is a convenience wrapper function around the following three functions
 #' that are often run together when perform integration.
-#' #' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}},
+#' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}},
 #' \code{\link{IntegrateEmbeddings}}.
 #'
 #' @inheritParams FindIntegrationAnchors

From 5455a71ad231710d7055bd87dff87159c4f783e4 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:47:29 -0500
Subject: [PATCH 108/166] #8404

---
 vignettes/sctransform_vignette.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/sctransform_vignette.Rmd b/vignettes/sctransform_vignette.Rmd
index 8b9230432..a3854a241 100644
--- a/vignettes/sctransform_vignette.Rmd
+++ b/vignettes/sctransform_vignette.Rmd
@@ -34,7 +34,7 @@ knitr::opts_chunk$set(
 Biological heterogeneity in single-cell RNA-seq data is often confounded by technical factors including sequencing depth. The number of molecules detected in each cell can vary significantly between cells, even within the same celltype.
 Interpretation of scRNA-seq data requires effective pre-processing and normalization to remove this technical variability. 
 
-In [our manuscript](https://genomebiology-biomedcentral-com/articles/10.1186/s13059-021-02584-9) we introduce a modeling framework for the normalization and variance stabilization of molecular count data from scRNA-seq experiments. This procedure omits the need for heuristic steps including pseudocount addition or log-transformation and improves common downstream analytical tasks such as variable gene selection, dimensional reduction, and differential expression. We named this method `sctransform`.
+In [our manuscript](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02584-9) we introduce a modeling framework for the normalization and variance stabilization of molecular count data from scRNA-seq experiments. This procedure omits the need for heuristic steps including pseudocount addition or log-transformation and improves common downstream analytical tasks such as variable gene selection, dimensional reduction, and differential expression. We named this method `sctransform`.
 
 Inspired by important and rigorous work from [Lause et al](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02451-7), we released an [updated manuscript](https://link.springer.com/article/10.1186/s13059-021-02584-9) and updated the sctransform software to a v2 version, which is now the default in Seurat v5.
 

From 94a9f79d5a075dfbcf5da7cc860999e50527deca Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:49:39 -0500
Subject: [PATCH 109/166] #8252

---
 R/visualization.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/visualization.R b/R/visualization.R
index fdacc7bda..73496c77e 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -581,7 +581,7 @@ RidgePlot <- function(
 #' @param pt.size Point size for points
 #' @param alpha Alpha value for points
 #' @param split.by A factor in object metadata to split the plot by, pass 'ident'
-#'  to split by cell identity'
+#' to split by cell identity
 #' @param split.plot  plot each group of the split violin plots by multiple or
 #' single violin shapes.
 #' @param adjust Adjust parameter for geom_violin
@@ -798,7 +798,7 @@ ColorDimSplit <- function(
 #' @param group.by Name of one or more metadata columns to group (color) cells by
 #' (for example, orig.ident); pass 'ident' to group by identity class
 #' @param split.by A factor in object metadata to split the plot by, pass 'ident'
-#'  to split by cell identity'
+#' to split by cell identity
 #' @param shape.by If NULL, all points are circles (default). You can specify any
 #' cell attribute (that can be pulled with FetchData) allowing for both
 #' different colors and different shapes on cells.  Only applicable if \code{raster = FALSE}.
@@ -1015,7 +1015,7 @@ DimPlot <- function(
 #' @param min.cutoff,max.cutoff Vector of minimum and maximum cutoff values for each feature,
 #'  may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')
 #' @param split.by A factor in object metadata to split the plot by, pass 'ident'
-#'  to split by cell identity'
+#' to split by cell identity
 #' @param keep.scale How to handle the color scale across multiple plots. Options are:
 #' \itemize{
 #'   \item \dQuote{feature} (default; by row/feature scaling): The plots for
@@ -1970,7 +1970,7 @@ CellScatter <- function(
 #' @param pt.size Size of the points on the plot
 #' @param shape.by Ignored for now
 #' @param split.by A factor in object metadata to split the feature plot by, pass 'ident'
-#'  to split by cell identity'
+#' to split by cell identity
 #' @param span Spline span in loess function call, if \code{NULL}, no spline added
 #' @param smooth Smooth the graph (similar to smoothScatter)
 #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data'
@@ -4374,7 +4374,7 @@ BarcodeInflectionsPlot <- function(object) {
 #' @param idents Identity classes to include in plot (default is all)
 #' @param group.by Factor to group the cells by
 #' @param split.by A factor in object metadata to split the plot by, pass 'ident'
-#'  to split by cell identity'
+#' to split by cell identity
 #' see \code{\link{FetchData}} for more details
 #' @param cluster.idents Whether to order identities by hierarchical clusters
 #' based on given features, default is FALSE

From 8685fbe50c0b201dd821e3a9db613093983996be Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:50:30 -0500
Subject: [PATCH 110/166] #8209

---
 vignettes/integration_mapping.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/integration_mapping.Rmd b/vignettes/integration_mapping.Rmd
index 9702c213c..b53d852a3 100644
--- a/vignettes/integration_mapping.Rmd
+++ b/vignettes/integration_mapping.Rmd
@@ -69,7 +69,7 @@ pancreas.ref <- RunPCA(pancreas.ref)
 pancreas.ref <- FindNeighbors(pancreas.ref, dims=1:30)
 pancreas.ref <- FindClusters(pancreas.ref)
 pancreas.ref <- RunUMAP(pancreas.ref, dims = 1:30)
-DimPlot(pancreas.ref,group.by = c("celltytpe","tech"))
+DimPlot(pancreas.ref,group.by = c("celltype","tech"))
 ```
 
 Next, we integrate the datasets into a shared reference. Please see our [introduction to integration vignette](https://satijalab.org/seurat/articles/integration_introduction)

From 194427fee68c293d628761efe5eaf8ae192607be Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:51:25 -0500
Subject: [PATCH 111/166] #7298

---
 vignettes/pbmc3k_tutorial.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/pbmc3k_tutorial.Rmd b/vignettes/pbmc3k_tutorial.Rmd
index 777303216..99581c1cf 100644
--- a/vignettes/pbmc3k_tutorial.Rmd
+++ b/vignettes/pbmc3k_tutorial.Rmd
@@ -32,7 +32,7 @@ knitr::opts_chunk$set(
 
 # Setup the Seurat Object
 
-For this tutorial, we will be analyzing the a dataset of Peripheral Blood Mononuclear Cells (PBMC) freely available from 10X Genomics. There are 2,700 single cells that were sequenced on the Illumina NextSeq 500. The raw data can be found [here](https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz).
+For this tutorial, we will be analyzing a dataset of Peripheral Blood Mononuclear Cells (PBMC) freely available from 10X Genomics. There are 2,700 single cells that were sequenced on the Illumina NextSeq 500. The raw data can be found [here](https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz).
 
 We start by reading in the data. The `Read10X()` function reads in the output of the [cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) pipeline from 10X, returning a unique molecular identified (UMI) count matrix. The values in this matrix represent the number of molecules for each feature (i.e. gene; row) that are detected in each cell (column). Note that more recent versions of cellranger now also output using the [h5 file format](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices), which can be read in using the `Read10X_h5()` function in Seurat.
 

From 5d7b6d2310f27d45ed20a3e6a5079978099ca578 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 09:53:45 -0500
Subject: [PATCH 112/166] #6244

---
 R/objects.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/objects.R b/R/objects.R
index b01df7344..87177b0ae 100644
--- a/R/objects.R
+++ b/R/objects.R
@@ -330,10 +330,10 @@ VisiumV1 <- setClass(
 
 #' The VisiumV2 class
 #'
-#' The VisiumV2 class represents spatial information from the 10X Genomics 
-#' Visium HD platform - it can also accomodate data from the standard 
+#' The VisiumV2 class represents spatial information from the 10X Genomics
+#' Visium HD platform - it can also accomodate data from the standard
 #' Visium platform
-#' 
+#'
 #' @slot image A three-dimensional array with PNG image data, see
 #' \code{\link[png]{readPNG}} for more details
 #' @slot scale.factors An object of class \code{\link{scalefactors}}; see
@@ -1176,7 +1176,7 @@ as.Seurat.SingleCellExperiment <- function(
   CheckDots(...)
   if (!PackageCheck('SingleCellExperiment', error = FALSE)) {
     stop(
-      "Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object.",
+      "Please install SingleCellExperiment from Bioconductor before converting to a SingleCellExperiment object.",
       "\nhttps://bioconductor.org/packages/SingleCellExperiment/",
       call. = FALSE
     )
@@ -1319,7 +1319,7 @@ as.Seurat.SingleCellExperiment <- function(
 as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) {
   CheckDots(...)
   if (!PackageCheck('SingleCellExperiment', error = FALSE)) {
-    stop("Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object")
+    stop("Please install SingleCellExperiment from Bioconductor before converting to a SingleCellExperiment object")
   }
   assay <- assay %||% Assays(object = x)
   if (!all(assay %in% Assays(object = x))) {
@@ -1603,7 +1603,7 @@ GetImage.VisiumV1 <- function(
   ...
 ) {
   mode <- match.arg(arg = mode)
-  
+
   image <- slot(object = object, name = 'image')
 
   image <- switch(
@@ -1793,8 +1793,8 @@ Radius.STARmap <- function(object, ...) {
 }
 
 #'
-#' @param scale A factor to scale the radius by; one of: "hires", 
-#' "lowres", or \code{NULL} for the unscaled value.  
+#' @param scale A factor to scale the radius by; one of: "hires",
+#' "lowres", or \code{NULL} for the unscaled value.
 #'
 #' @rdname Radius
 #' @concept objects

From 132c4c22739a7708a7ef2e45afb45aeb081a29fc Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:41:14 -0500
Subject: [PATCH 113/166] more typo

---
 R/dimensional_reduction.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R
index dd1b9f4bb..ce9967ca9 100644
--- a/R/dimensional_reduction.R
+++ b/R/dimensional_reduction.R
@@ -10,7 +10,7 @@ NULL
 #'
 #' Randomly permutes a subset of data, and calculates projected PCA scores for
 #' these 'random' genes. Then compares the PCA scores for the 'random' genes
-#' with the observed PCA scores to determine statistical signifance. End result
+#' with the observed PCA scores to determine statistical significance. End result
 #' is a p-value for each gene's association with each principal component.
 #'
 #' @param object Seurat object
@@ -276,7 +276,7 @@ PCASigGenes <- function(
 #' data("pbmc_small")
 #' pbmc_small
 #' pbmc_small <- ProjectDim(object = pbmc_small, reduction = "pca")
-#' # Vizualize top projected genes in heatmap
+#' # Visualize top projected genes in heatmap
 #' DimHeatmap(object = pbmc_small, reduction = "pca", dims = 1, balanced = TRUE)
 #'
 ProjectDim <- function(
@@ -1693,8 +1693,8 @@ RunUMAP.Neighbor <- function(
 #' be selected based on the size of the input dataset (200 for large datasets, 500 for small).
 #' @param learning.rate The initial learning rate for the embedding optimization.
 #' @param min.dist This controls how tightly the embedding is allowed compress points together.
-#' Larger values ensure embedded points are moreevenly distributed, while smaller values allow the
-#' algorithm to optimise more accurately with regard to local structure. Sensible values are in
+#' Larger values ensure embedded points are more evenly distributed, while smaller values allow the
+#' algorithm to optimize more accurately with regard to local structure. Sensible values are in
 #' the range 0.001 to 0.5.
 #' @param spread The effective scale of embedded points. In combination with min.dist this
 #' determines how clustered/clumped the embedded points are.
@@ -1721,7 +1721,7 @@ RunUMAP.Neighbor <- function(
 #' @param uwot.sgd Set \code{uwot::umap(fast_sgd = TRUE)}; see \code{\link[uwot]{umap}} for more details
 #' @param metric.kwds A dictionary of arguments to pass on to the metric, such as the p value for
 #' Minkowski distance. If NULL then no arguments are passed on.
-#' @param angular.rp.forest Whether to use an angular random projection forest to initialise the
+#' @param angular.rp.forest Whether to use an angular random projection forest to initialize the
 #' approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that
 #' use an angular style distance such as cosine, correlation etc. In the case of those metrics
 #' angular forests will be chosen automatically.

From 59ae06c71d59f95b40577898f85248d85acff331 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:48:13 -0500
Subject: [PATCH 114/166] typo

---
 R/clustering.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index 902034510..6af115589 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -190,7 +190,7 @@ FindSubCluster <- function(
 #' embeddings from dimensional reductions.
 #'
 #' @param object The object used to calculate knn
-#' @param nn.idx k near neighbour indices. A cells x k matrix.
+#' @param nn.idx k near neighbor indices. A cells x k matrix.
 #' @param assay Assay used for prediction
 #' @param reduction Cell embedding of the reduction used for prediction
 #' @param dims Number of dimensions of cell embedding
@@ -522,7 +522,7 @@ FindClusters.Seurat <- function(
 #' cosine, manhattan, and hamming
 #' @param n.trees More trees gives higher precision when using annoy approximate
 #' nearest neighbor search
-#' @param nn.eps Error bound when performing nearest neighbor seach using RANN;
+#' @param nn.eps Error bound when performing nearest neighbor search using RANN;
 #' default of 0.0 implies exact nearest neighbor search
 #' @param verbose Whether or not to print output to the console
 #' @param l2.norm Take L2Norm of the data
@@ -868,7 +868,7 @@ FindNeighbors.Seurat <- function(
 # @param n.trees More trees gives higher precision when querying
 # @param k Number of neighbors
 # @param search.k During the query it will inspect up to search_k nodes which
-# gives you a run-time tradeoff between better accuracy and speed.
+# gives you a run-time trade off between better accuracy and speed.
 # @param include.distance Include the corresponding distances
 # @param index optional index object, will be recomputed if not provided
 #
@@ -928,7 +928,7 @@ AnnoyBuildIndex <- function(data, metric = "euclidean", n.trees = 50) {
 # @param query A set of data to be queried against the index
 # @param k Number of neighbors
 # @param search.k During the query it will inspect up to search_k nodes which
-# gives you a run-time tradeoff between better accuracy and speed.
+# gives you a run-time trade off between better accuracy and speed.
 # @param include.distance Include the corresponding distances in the result
 #
 # @return A list with 'nn.idx' (for each element in 'query', the index of the

From acc59da66b6329e4847a6d42f307c68507af2444 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:49:25 -0500
Subject: [PATCH 115/166] roxygenize

---
 man/CalculateBarcodeInflections.Rd | 2 +-
 man/ColorDimSplit.Rd               | 2 +-
 man/DimPlot.Rd                     | 2 +-
 man/DotPlot.Rd                     | 2 +-
 man/FastRPCAIntegration.Rd         | 2 +-
 man/FeaturePlot.Rd                 | 4 ++--
 man/FeatureScatter.Rd              | 2 +-
 man/FindNeighbors.Rd               | 2 +-
 man/FindVariableFeatures.Rd        | 4 ++--
 man/ISpatialDimPlot.Rd             | 2 +-
 man/ISpatialFeaturePlot.Rd         | 2 +-
 man/ImageDimPlot.Rd                | 2 +-
 man/ImageFeaturePlot.Rd            | 2 +-
 man/JackStraw.Rd                   | 2 +-
 man/LinkedPlots.Rd                 | 2 +-
 man/PredictAssay.Rd                | 2 +-
 man/PrepareBridgeReference.Rd      | 2 +-
 man/ProjectDim.Rd                  | 2 +-
 man/Radius.Rd                      | 2 +-
 man/RunUMAP.Rd                     | 6 +++---
 man/SingleSpatialPlot.Rd           | 4 ++--
 man/SpatialPlot.Rd                 | 4 ++--
 man/VisiumV2-class.Rd              | 4 ++--
 man/VlnPlot.Rd                     | 2 +-
 24 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/man/CalculateBarcodeInflections.Rd b/man/CalculateBarcodeInflections.Rd
index c5daa5caa..438a22005 100644
--- a/man/CalculateBarcodeInflections.Rd
+++ b/man/CalculateBarcodeInflections.Rd
@@ -21,7 +21,7 @@ CalculateBarcodeInflections(
 
 \item{threshold.low}{Ignore barcodes of rank below this threshold in inflection calculation}
 
-\item{threshold.high}{Ignore barcodes of rank above thisf threshold in inflection calculation}
+\item{threshold.high}{Ignore barcodes of rank above this threshold in inflection calculation}
 }
 \value{
 Returns Seurat object with a new list in the `tools` slot, `CalculateBarcodeInflections` with values:
diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd
index 33efebabd..bfb9202d2 100644
--- a/man/ColorDimSplit.Rd
+++ b/man/ColorDimSplit.Rd
@@ -38,7 +38,7 @@ See \code{\link{DiscretePalette}} for details.}
     \item{\code{group.by}}{Name of one or more metadata columns to group (color) cells by
 (for example, orig.ident); pass 'ident' to group by identity class}
     \item{\code{split.by}}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
     \item{\code{shape.by}}{If NULL, all points are circles (default). You can specify any
 cell attribute (that can be pulled with FetchData) allowing for both
 different colors and different shapes on cells.  Only applicable if \code{raster = FALSE}.}
diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd
index a46a402f0..aa2212b94 100644
--- a/man/DimPlot.Rd
+++ b/man/DimPlot.Rd
@@ -63,7 +63,7 @@ See \code{\link{DiscretePalette}} for details.}
 (for example, orig.ident); pass 'ident' to group by identity class}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{shape.by}{If NULL, all points are circles (default). You can specify any
 cell attribute (that can be pulled with FetchData) allowing for both
diff --git a/man/DotPlot.Rd b/man/DotPlot.Rd
index f59e9fa2e..511e86e2b 100644
--- a/man/DotPlot.Rd
+++ b/man/DotPlot.Rd
@@ -54,7 +54,7 @@ gene will have no dot drawn.}
 \item{group.by}{Factor to group the cells by}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
- to split by cell identity'
+to split by cell identity
 see \code{\link{FetchData}} for more details}
 
 \item{cluster.idents}{Whether to order identities by hierarchical clusters
diff --git a/man/FastRPCAIntegration.Rd b/man/FastRPCAIntegration.Rd
index 4c6d8e55b..b70aca2ab 100644
--- a/man/FastRPCAIntegration.Rd
+++ b/man/FastRPCAIntegration.Rd
@@ -64,7 +64,7 @@ Returns a Seurat object with integrated dimensional reduction
 \description{
 This is a convenience wrapper function around the following three functions
 that are often run together when perform integration.
-#' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}},
+\code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}},
 \code{\link{IntegrateEmbeddings}}.
 }
 \concept{integration}
diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd
index fa5c4293a..1ac0f4bf9 100644
--- a/man/FeaturePlot.Rd
+++ b/man/FeaturePlot.Rd
@@ -81,7 +81,7 @@ may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1',
 \item{reduction}{Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{keep.scale}{How to handle the color scale across multiple plots. Options are:
 \itemize{
@@ -91,7 +91,7 @@ to split by cell identity'}
   \item \dQuote{all} (universal scaling): The plots for all features and
     conditions are scaled to the maximum expression value for the feature
     with the highest overall expression
-  \item \code{all} (no scaling): Each individual plot is scaled to the
+  \item \code{NULL} (no scaling): Each individual plot is scaled to the
     maximum expression value of the feature in the condition provided to
     \code{split.by}. Be aware setting \code{NULL} will result in color
     scales that are not comparable between plots
diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd
index 5c1c6db47..5685d2ed1 100644
--- a/man/FeatureScatter.Rd
+++ b/man/FeatureScatter.Rd
@@ -48,7 +48,7 @@ useful for crowded plots if points of interest are being buried. (default is FAL
 (for example, orig.ident); pass 'ident' to group by identity class}
 
 \item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{cols}{Colors to use for identity class plotting.}
 
diff --git a/man/FindNeighbors.Rd b/man/FindNeighbors.Rd
index b6c520bca..0c4b8c703 100644
--- a/man/FindNeighbors.Rd
+++ b/man/FindNeighbors.Rd
@@ -118,7 +118,7 @@ nearest neighbor search}
 \item{annoy.metric}{Distance metric for annoy. Options include: euclidean,
 cosine, manhattan, and hamming}
 
-\item{nn.eps}{Error bound when performing nearest neighbor seach using RANN;
+\item{nn.eps}{Error bound when performing nearest neighbor search using RANN;
 default of 0.0 implies exact nearest neighbor search}
 
 \item{verbose}{Whether or not to print output to the console}
diff --git a/man/FindVariableFeatures.Rd b/man/FindVariableFeatures.Rd
index e4a3deac9..dc1bf5efa 100644
--- a/man/FindVariableFeatures.Rd
+++ b/man/FindVariableFeatures.Rd
@@ -75,7 +75,7 @@ FindVariableFeatures(object, ...)
   \item \dQuote{\code{mean.var.plot}} (mvp): First, uses a function to
     calculate average expression (mean.function) and dispersion
     (dispersion.function) for each feature. Next, divides features into
-    \code{num.bin} (deafult 20) bins based on their average expression,
+    \code{num.bin} (default 20) bins based on their average expression,
     and calculates z-scores for dispersion within each bin. The purpose of
     this is to identify variable features while controlling for the
     strong relationship between variability and average expression
@@ -106,7 +106,7 @@ methods are:
     x-axis (default)
   \item \dQuote{\code{equal_frequency}}: each bin contains an equal number
     of features (can increase statistical power to detect overdispersed
-    eatures at high expression values, at the cost of reduced resolution
+    features at high expression values, at the cost of reduced resolution
     along the x-axis)
 }}
 
diff --git a/man/ISpatialDimPlot.Rd b/man/ISpatialDimPlot.Rd
index ebe615fe8..e486fa164 100644
--- a/man/ISpatialDimPlot.Rd
+++ b/man/ISpatialDimPlot.Rd
@@ -17,7 +17,7 @@ ISpatialDimPlot(
 
 \item{image}{Name of the image to use in the plot}
 
-\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in 
+\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in
 order to matchthe plot with the specified `image` - defaults to "lowres"}
 
 \item{group.by}{Name of meta.data column to group the data by}
diff --git a/man/ISpatialFeaturePlot.Rd b/man/ISpatialFeaturePlot.Rd
index e4bd477ba..2b70af290 100644
--- a/man/ISpatialFeaturePlot.Rd
+++ b/man/ISpatialFeaturePlot.Rd
@@ -20,7 +20,7 @@ ISpatialFeaturePlot(
 
 \item{image}{Name of the image to use in the plot}
 
-\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in 
+\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in
 order to matchthe plot with the specified `image` - defaults to "lowres"}
 
 \item{slot}{If plotting a feature, which data slot to pull from (counts,
diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd
index 43fe764d6..ad01b5618 100644
--- a/man/ImageDimPlot.Rd
+++ b/man/ImageDimPlot.Rd
@@ -46,7 +46,7 @@ segmentation boundaries}
 (for example, orig.ident); pass 'ident' to group by identity class}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{cols}{Vector of colors, each color corresponds to an identity class. This may also be a single character
 or numeric value corresponding to a palette as specified by \code{\link[RColorBrewer]{brewer.pal.info}}.
diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd
index 95dc7fe69..7d6e00a15 100644
--- a/man/ImageFeaturePlot.Rd
+++ b/man/ImageFeaturePlot.Rd
@@ -73,7 +73,7 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors:
 may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{molecules}{A vector of molecules to plot}
 
diff --git a/man/JackStraw.Rd b/man/JackStraw.Rd
index 3201b443a..13a90219e 100644
--- a/man/JackStraw.Rd
+++ b/man/JackStraw.Rd
@@ -43,7 +43,7 @@ represents p-values for all genes.
 \description{
 Randomly permutes a subset of data, and calculates projected PCA scores for
 these 'random' genes. Then compares the PCA scores for the 'random' genes
-with the observed PCA scores to determine statistical signifance. End result
+with the observed PCA scores to determine statistical significance. End result
 is a p-value for each gene's association with each principal component.
 }
 \examples{
diff --git a/man/LinkedPlots.Rd b/man/LinkedPlots.Rd
index 6ed68241f..91a09e91b 100644
--- a/man/LinkedPlots.Rd
+++ b/man/LinkedPlots.Rd
@@ -40,7 +40,7 @@ LinkedFeaturePlot(
 
 \item{image}{Name of the image to use in the plot}
 
-\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in 
+\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in
 order to matchthe plot with the specified `image` - defaults to "lowres"}
 
 \item{group.by}{Name of meta.data column to group the data by}
diff --git a/man/PredictAssay.Rd b/man/PredictAssay.Rd
index e0e4c2803..f99e4f2de 100644
--- a/man/PredictAssay.Rd
+++ b/man/PredictAssay.Rd
@@ -21,7 +21,7 @@ PredictAssay(
 \arguments{
 \item{object}{The object used to calculate knn}
 
-\item{nn.idx}{k near neighbour indices. A cells x k matrix.}
+\item{nn.idx}{k near neighbor indices. A cells x k matrix.}
 
 \item{assay}{Assay used for prediction}
 
diff --git a/man/PrepareBridgeReference.Rd b/man/PrepareBridgeReference.Rd
index 46f2c1a54..455121d36 100644
--- a/man/PrepareBridgeReference.Rd
+++ b/man/PrepareBridgeReference.Rd
@@ -42,7 +42,7 @@ or SCT}
 
 \item{supervised.reduction}{Type of supervised dimensional reduction to be performed
 for integrating the bridge and query.
-#' Options are:
+Options are:
 \itemize{
    \item{slsi: Perform supervised LSI as the dimensional reduction for
    the bridge-query integration}
diff --git a/man/ProjectDim.Rd b/man/ProjectDim.Rd
index 6a6854467..7c4ab8a1d 100644
--- a/man/ProjectDim.Rd
+++ b/man/ProjectDim.Rd
@@ -46,7 +46,7 @@ all genes.
 data("pbmc_small")
 pbmc_small
 pbmc_small <- ProjectDim(object = pbmc_small, reduction = "pca")
-# Vizualize top projected genes in heatmap
+# Visualize top projected genes in heatmap
 DimHeatmap(object = pbmc_small, reduction = "pca", dims = 1, balanced = TRUE)
 
 }
diff --git a/man/Radius.Rd b/man/Radius.Rd
index 31597ca51..838cd1021 100644
--- a/man/Radius.Rd
+++ b/man/Radius.Rd
@@ -20,7 +20,7 @@
 
 \item{...}{Arguments passed to other methods}
 
-\item{scale}{A factor to scale the radius by; one of: "hires", 
+\item{scale}{A factor to scale the radius by; one of: "hires",
 "lowres", or \code{NULL} for the unscaled value.}
 }
 \description{
diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd
index fba7ec869..7850abb4e 100644
--- a/man/RunUMAP.Rd
+++ b/man/RunUMAP.Rd
@@ -147,8 +147,8 @@ be selected based on the size of the input dataset (200 for large datasets, 500
 \item{learning.rate}{The initial learning rate for the embedding optimization.}
 
 \item{min.dist}{This controls how tightly the embedding is allowed compress points together.
-Larger values ensure embedded points are moreevenly distributed, while smaller values allow the
-algorithm to optimise more accurately with regard to local structure. Sensible values are in
+Larger values ensure embedded points are more evenly distributed, while smaller values allow the
+algorithm to optimize more accurately with regard to local structure. Sensible values are in
 the range 0.001 to 0.5.}
 
 \item{spread}{The effective scale of embedded points. In combination with min.dist this
@@ -188,7 +188,7 @@ NULL will not set a seed}
 \item{metric.kwds}{A dictionary of arguments to pass on to the metric, such as the p value for
 Minkowski distance. If NULL then no arguments are passed on.}
 
-\item{angular.rp.forest}{Whether to use an angular random projection forest to initialise the
+\item{angular.rp.forest}{Whether to use an angular random projection forest to initialize the
 approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that
 use an angular style distance such as cosine, correlation etc. In the case of those metrics
 angular forests will be chosen automatically.}
diff --git a/man/SingleSpatialPlot.Rd b/man/SingleSpatialPlot.Rd
index f199eac83..4cf8530e6 100644
--- a/man/SingleSpatialPlot.Rd
+++ b/man/SingleSpatialPlot.Rd
@@ -37,7 +37,7 @@ colors}
 \item{image.alpha}{Adjust the opacity of the background images. Set to 0 to
 remove.}
 
-\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in 
+\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in
 order to matchthe plot with the specified `image` - defaults to "lowres"}
 
 \item{pt.alpha}{Adjust the opacity of the points if plotting a
@@ -48,7 +48,7 @@ to show entire background image.}
 
 \item{pt.size.factor}{Sets the size of the points relative to spot.radius}
 
-\item{shape}{Control the shape of the spots - same as the ggplot2 parameter. 
+\item{shape}{Control the shape of the spots - same as the ggplot2 parameter.
 The default is 21, which plots cirlces - use 22 to plot squares.}
 
 \item{stroke}{Control the width of the border around the spots}
diff --git a/man/SpatialPlot.Rd b/man/SpatialPlot.Rd
index e58d5e56b..700683419 100644
--- a/man/SpatialPlot.Rd
+++ b/man/SpatialPlot.Rd
@@ -105,7 +105,7 @@ default, ggplot2 assigns colors}
 \item{image.alpha}{Adjust the opacity of the background images. Set to 0 to
 remove.}
 
-\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in 
+\item{image.scale}{Choose the scale factor ("lowres"/"hires") to apply in
 order to matchthe plot with the specified `image` - defaults to "lowres"}
 
 \item{crop}{Crop the plot in to focus on points plotted. Set to \code{FALSE} to show
@@ -166,7 +166,7 @@ themeing will not work when plotting multiple features/groupings}
 min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single
 alpha value for each plot.}
 
-\item{shape}{Control the shape of the spots - same as the ggplot2 parameter. 
+\item{shape}{Control the shape of the spots - same as the ggplot2 parameter.
 The default is 21, which plots circles - use 22 to plot squares.}
 
 \item{stroke}{Control the width of the border around the spots}
diff --git a/man/VisiumV2-class.Rd b/man/VisiumV2-class.Rd
index b2feaa6ea..f663b4ca9 100644
--- a/man/VisiumV2-class.Rd
+++ b/man/VisiumV2-class.Rd
@@ -6,8 +6,8 @@
 \alias{VisiumV2}
 \title{The VisiumV2 class}
 \description{
-The VisiumV2 class represents spatial information from the 10X Genomics 
-Visium HD platform - it can also accomodate data from the standard 
+The VisiumV2 class represents spatial information from the 10X Genomics
+Visium HD platform - it can also accomodate data from the standard
 Visium platform
 }
 \section{Slots}{
diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd
index 00be2eb10..d84009d50 100644
--- a/man/VlnPlot.Rd
+++ b/man/VlnPlot.Rd
@@ -53,7 +53,7 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas
 \item{group.by}{Group (color) cells in different ways (for example, orig.ident)}
 
 \item{split.by}{A factor in object metadata to split the plot by, pass 'ident'
-to split by cell identity'}
+to split by cell identity}
 
 \item{adjust}{Adjust parameter for geom_violin}
 

From dd745c4319813fefbcc21922821bd8da23ed46de Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 16:56:03 -0500
Subject: [PATCH 116/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9e18a1d37..9b4bab282 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9013
+Version: 5.1.0.9014
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From c6edcee63e1e7ba3d3cfaafd6f1bbffcb21c3dba Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Tue, 12 Dec 2023 16:49:14 -0500
Subject: [PATCH 117/166] add stroke size param

---
 R/visualization.R | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/R/visualization.R b/R/visualization.R
index 73496c77e..167179acd 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -875,6 +875,7 @@ DimPlot <- function(
   label.box = FALSE,
   repel = FALSE,
   alpha = 1,
+  stroke.size = NULL,
   cells.highlight = NULL,
   cols.highlight = '#DE2D26',
   sizes.highlight = 1,
@@ -941,6 +942,7 @@ DimPlot <- function(
         shape.by = shape.by,
         order = order,
         alpha = alpha,
+        stroke.size = stroke.size
         label = FALSE,
         cells.highlight = cells.highlight,
         cols.highlight = cols.highlight,
@@ -8224,6 +8226,7 @@ SingleDimPlot <- function(
   shape.by = NULL,
   alpha = 1,
   alpha.by = NULL,
+  stroke.size = NULL,
   order = NULL,
   label = FALSE,
   repel = FALSE,
@@ -8242,6 +8245,10 @@ SingleDimPlot <- function(
   raster <- raster %||% (nrow(x = data) > 1e5)
   pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster)
 
+  if (is.null(x = stroke.size)) {
+    stroke.size <- 0.600075815011372
+  }
+
   if (!is.null(x = cells.highlight) && pt.size != AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && isTRUE(x = raster)) {
     warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.")
   }
@@ -8352,7 +8359,8 @@ SingleDimPlot <- function(
         alpha = alpha.by
       ),
       size = pt.size,
-      alpha = alpha
+      alpha = alpha,
+      stroke = stroke.size
     )
   }
   plot <- plot +

From 4611c793219c4d21f94cab18939367aa13aa19ab Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Tue, 12 Dec 2023 16:50:56 -0500
Subject: [PATCH 118/166] add param description

---
 R/visualization.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/visualization.R b/R/visualization.R
index 167179acd..12ac3d7ec 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -814,6 +814,7 @@ ColorDimSplit <- function(
 #' @param label.box Whether to put a box around the label text (geom_text vs
 #' geom_label)
 #' @param alpha Alpha value for plotting (default is 1)
+#' @param stroke.size Adjust stroke (outline) size of points
 #' @param repel Repel labels
 #' @param cells.highlight A list of character or numeric vectors of cells to
 #' highlight. If only one group of cells desired, can simply

From d8013ae47214a5c2d84e0fb0c78dbc5f600203a0 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Tue, 12 Dec 2023 16:51:14 -0500
Subject: [PATCH 119/166] missing comma

---
 R/visualization.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/visualization.R b/R/visualization.R
index 12ac3d7ec..8a82fddb7 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -943,7 +943,7 @@ DimPlot <- function(
         shape.by = shape.by,
         order = order,
         alpha = alpha,
-        stroke.size = stroke.size
+        stroke.size = stroke.size,
         label = FALSE,
         cells.highlight = cells.highlight,
         cols.highlight = cols.highlight,

From e692606a9dca4add92bc2f7ce26412d19c323947 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Tue, 12 Dec 2023 16:52:27 -0500
Subject: [PATCH 120/166] add param description singledimplot

---
 R/visualization.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/visualization.R b/R/visualization.R
index 8a82fddb7..5950d2bdc 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -8185,6 +8185,7 @@ SingleCorPlot <- function(
 #' allowing for both different colors and different shapes on cells.
 #' @param alpha Alpha value for plotting (default is 1)
 #' @param alpha.by Mapping variable for the point alpha value
+#' @param stroke.size Adjust stroke (outline) size of points
 #' @param order Specify the order of plotting for the idents. This can be
 #' useful for crowded plots if points of interest are being buried. Provide
 #' either a full list of valid idents or a subset to be plotted last (on top).

From f2a0f58dbd7ad4640012d41d99a6aca34521cfe3 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:23:20 -0500
Subject: [PATCH 121/166] fix ordering of param warnings

---
 R/visualization.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/visualization.R b/R/visualization.R
index 5950d2bdc..64e5c9a52 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -814,8 +814,8 @@ ColorDimSplit <- function(
 #' @param label.box Whether to put a box around the label text (geom_text vs
 #' geom_label)
 #' @param alpha Alpha value for plotting (default is 1)
-#' @param stroke.size Adjust stroke (outline) size of points
 #' @param repel Repel labels
+#' @param stroke.size Adjust stroke (outline) size of points
 #' @param cells.highlight A list of character or numeric vectors of cells to
 #' highlight. If only one group of cells desired, can simply
 #' pass a vector instead of a list. If set, colors selected cells to the color(s)

From 20d9945d16ae7fa732d1ee1a116a013d3be2f854 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:23:45 -0500
Subject: [PATCH 122/166] biorxiv replace url with check warnings to published
 URL

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 6581d2e5a..b3dbae668 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -852,7 +852,7 @@ LoadCurioSeeker <- function(data.dir, assay = "Spatial") {
 #' @export
 #' @concept preprocessing
 #'
-#' @references \url{https://www.biorxiv.org/content/10.1101/387241v1}
+#' @references \url{https://doi.org/10.1038/s41592-019-0433-8}
 #'
 #' @examples
 #' \dontrun{

From 7b52de131dd197a1a55cd46cbce08a0c3f2b43d4 Mon Sep 17 00:00:00 2001
From: samuel-marsh <samuel.marsh@childrens.harvard.edu>
Date: Thu, 19 Dec 2024 10:36:26 -0500
Subject: [PATCH 123/166] roxygenize

---
 man/ColorDimSplit.Rd | 1 +
 man/DimPlot.Rd       | 3 +++
 man/MULTIseqDemux.Rd | 2 +-
 man/SingleDimPlot.Rd | 3 +++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd
index bfb9202d2..769594266 100644
--- a/man/ColorDimSplit.Rd
+++ b/man/ColorDimSplit.Rd
@@ -55,6 +55,7 @@ useful for crowded plots if points of interest are being buried. (default is FAL
 geom_label)}
     \item{\code{alpha}}{Alpha value for plotting (default is 1)}
     \item{\code{repel}}{Repel labels}
+    \item{\code{stroke.size}}{Adjust stroke (outline) size of points}
     \item{\code{cells.highlight}}{A list of character or numeric vectors of cells to
 highlight. If only one group of cells desired, can simply
 pass a vector instead of a list. If set, colors selected cells to the color(s)
diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd
index aa2212b94..88fdc6754 100644
--- a/man/DimPlot.Rd
+++ b/man/DimPlot.Rd
@@ -27,6 +27,7 @@ DimPlot(
   label.box = FALSE,
   repel = FALSE,
   alpha = 1,
+  stroke.size = NULL,
   cells.highlight = NULL,
   cols.highlight = "#DE2D26",
   sizes.highlight = 1,
@@ -91,6 +92,8 @@ geom_label)}
 
 \item{alpha}{Alpha value for plotting (default is 1)}
 
+\item{stroke.size}{Adjust stroke (outline) size of points}
+
 \item{cells.highlight}{A list of character or numeric vectors of cells to
 highlight. If only one group of cells desired, can simply
 pass a vector instead of a list. If set, colors selected cells to the color(s)
diff --git a/man/MULTIseqDemux.Rd b/man/MULTIseqDemux.Rd
index 4739af19e..d7255d008 100644
--- a/man/MULTIseqDemux.Rd
+++ b/man/MULTIseqDemux.Rd
@@ -42,6 +42,6 @@ object <- MULTIseqDemux(object)
 
 }
 \references{
-\url{https://www.biorxiv.org/content/10.1101/387241v1}
+\url{https://doi.org/10.1038/s41592-019-0433-8}
 }
 \concept{preprocessing}
diff --git a/man/SingleDimPlot.Rd b/man/SingleDimPlot.Rd
index 413bd721a..2c7f467cd 100644
--- a/man/SingleDimPlot.Rd
+++ b/man/SingleDimPlot.Rd
@@ -13,6 +13,7 @@ SingleDimPlot(
   shape.by = NULL,
   alpha = 1,
   alpha.by = NULL,
+  stroke.size = NULL,
   order = NULL,
   label = FALSE,
   repel = FALSE,
@@ -47,6 +48,8 @@ allowing for both different colors and different shapes on cells.}
 
 \item{alpha.by}{Mapping variable for the point alpha value}
 
+\item{stroke.size}{Adjust stroke (outline) size of points}
+
 \item{order}{Specify the order of plotting for the idents. This can be
 useful for crowded plots if points of interest are being buried. Provide
 either a full list of valid idents or a subset to be plotted last (on top).}

From 48e882bff90fd320e986274b1c788cb95b2bac0c Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 18:03:24 -0500
Subject: [PATCH 124/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9b4bab282..907f1ea1f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9014
+Version: 5.1.0.9015
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From b814066d2338a8c263cc32e03632231552e1ae85 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 18:05:06 -0500
Subject: [PATCH 125/166] Update changelog

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 55c05cf81..e81712cd5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))
 - Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))
 - Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
 - Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`

From bbb9ceef44037df7dde71bee95a11337d5be6e2b Mon Sep 17 00:00:00 2001
From: Izzy Grabski <ing@izzys-air.nygenome.org>
Date: Fri, 1 Nov 2024 11:17:58 -0400
Subject: [PATCH 126/166] only modify dims.to.integrate if it's incompatible
 with the reduction dimension

---
 R/integration.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/integration.R b/R/integration.R
index 2264783f0..46387076c 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -6319,8 +6319,8 @@ ValidateParams_IntegrateEmbeddings_IntegrationAnchors <- function(
       warning("Max dims.to.integrate is larger than the number of dimensions in ",
               "the provided reduction. Setting dims.to.integrate to 1:",
               ncol(x = reductions), " and continuing.", immediate. = TRUE, call. = FALSE)
+      ModifyParam(param = 'dims.to.integrate', value = 1:ncol(x = reductions))
     }
-    ModifyParam(param = 'dims.to.integrate', value = 1:ncol(x = reductions))
   }
   if (!is.null(x = weight.reduction)) {
     if (inherits(x = weight.reduction, what = "character")) {

From c9f8b5e9fec5c6d6e23ad021e9b9143195b605e9 Mon Sep 17 00:00:00 2001
From: Izzy Grabski <ing@izzys-air.nygenome.org>
Date: Fri, 1 Nov 2024 11:41:22 -0400
Subject: [PATCH 127/166] only use the reduction loadings specified by
 dims.to.integrate

---
 R/integration.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/integration.R b/R/integration.R
index 46387076c..71173dbeb 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -1694,7 +1694,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function(
         object = reference.integrated[[new.reduction.name.safe]]
       ))),
       assay = intdr.assay,
-      loadings = Loadings(object = reductions),
+      loadings = Loadings(object = reductions)[,dims.to.integrate],
       key = paste0(new.reduction.name.safe, "_")
     )
     DefaultAssay(object = reference.integrated) <- int.assay

From b132d0c64a9ea201da012ffd8114213797f11aba Mon Sep 17 00:00:00 2001
From: Izzy Grabski <ing@izzys-air.nygenome.org>
Date: Fri, 1 Nov 2024 11:52:30 -0400
Subject: [PATCH 128/166] otherwise dims.to.integrate will stay NULL if no
 value specified

---
 R/integration.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/integration.R b/R/integration.R
index 71173dbeb..39899bede 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -6321,6 +6321,7 @@ ValidateParams_IntegrateEmbeddings_IntegrationAnchors <- function(
               ncol(x = reductions), " and continuing.", immediate. = TRUE, call. = FALSE)
       ModifyParam(param = 'dims.to.integrate', value = 1:ncol(x = reductions))
     }
+    ModifyParam(param = 'dims.to.integrate', value = dims.to.integrate)
   }
   if (!is.null(x = weight.reduction)) {
     if (inherits(x = weight.reduction, what = "character")) {

From 81cb30c587fb9e8b2beb07016408f2083cfc78bd Mon Sep 17 00:00:00 2001
From: Izzy Grabski <ing@izzys-air.nygenome.org>
Date: Fri, 1 Nov 2024 12:12:04 -0400
Subject: [PATCH 129/166] adding tests to make sure dims.to.integrate is not
 overwritten

---
 tests/testthat/test_integration5.R | 51 ++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tests/testthat/test_integration5.R b/tests/testthat/test_integration5.R
index f4f635790..374f731ac 100644
--- a/tests/testthat/test_integration5.R
+++ b/tests/testthat/test_integration5.R
@@ -95,6 +95,23 @@ test_that("IntegrateLayers works with CCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
+  
+  integrated_sub <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = CCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:10
+    )
+  )
+  # check that dims.to.integrate is not being overwritten
+  expect_equal(ncol(integrated_sub[["integrated"]]), 10)
 })
 
 test_that("IntegrateLayers works with RPCAIntegration", {
@@ -127,6 +144,23 @@ test_that("IntegrateLayers works with RPCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
+  
+  integrated_sub <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = RPCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:10
+    )
+  )
+  # check that dims.to.integrate is not being overwritten
+  expect_equal(ncol(integrated_sub[["integrated"]]), 10)
 })
 
 test_that("IntegrateLayers works with JointPCAIntegration", {
@@ -159,6 +193,23 @@ test_that("IntegrateLayers works with JointPCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
+  
+  integrated_sub <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = JointPCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:10
+    )
+  )
+  # check that dims.to.integrate is not being overwritten
+  expect_equal(ncol(integrated_sub[["integrated"]]), 10)
 })
 
 test_that("IntegrateLayers fails when expected", {

From 0c7186d5216b359f1c03023d9574096429c8e4e7 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:09:28 -0500
Subject: [PATCH 130/166] Expand on IntegrateLayers dims.to.integrate tests

---
 tests/testthat/test_integration5.R | 69 ++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test_integration5.R b/tests/testthat/test_integration5.R
index 374f731ac..73552c198 100644
--- a/tests/testthat/test_integration5.R
+++ b/tests/testthat/test_integration5.R
@@ -95,7 +95,7 @@ test_that("IntegrateLayers works with CCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
-  
+
   integrated_sub <- suppressWarnings(
     IntegrateLayers(
       test.data.std,
@@ -110,8 +110,28 @@ test_that("IntegrateLayers works with CCAIntegration", {
       dims.to.integrate = 1:10
     )
   )
-  # check that dims.to.integrate is not being overwritten
+  # check that the integrated reduction has the specified number of
+  # `dims.to.integrate`
   expect_equal(ncol(integrated_sub[["integrated"]]), 10)
+
+  integrated_overflow <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = CCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:100
+    )
+  )
+  # check that the integrated reduction is the same as you'd get if you
+  # didn't specify `dims.to.integrate` (i.e. the same size as the initial
+  # reduction)
+  expect_equal(Embeddings(integrated_overflow), Embeddings(integrated))
 })
 
 test_that("IntegrateLayers works with RPCAIntegration", {
@@ -144,7 +164,9 @@ test_that("IntegrateLayers works with RPCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
-  
+
+  # check that the integrated reduction has the specified number of
+  # `dims.to.integrate`
   integrated_sub <- suppressWarnings(
     IntegrateLayers(
       test.data.std,
@@ -161,6 +183,25 @@ test_that("IntegrateLayers works with RPCAIntegration", {
   )
   # check that dims.to.integrate is not being overwritten
   expect_equal(ncol(integrated_sub[["integrated"]]), 10)
+
+  integrated_overflow <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = RPCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:100
+    )
+  )
+  # check that the integrated reduction is the same as you'd get if you
+  # didn't specify `dims.to.integrate` (i.e. the same size as the initial
+  # reduction)
+  expect_equal(Embeddings(integrated_overflow), Embeddings(integrated))
 })
 
 test_that("IntegrateLayers works with JointPCAIntegration", {
@@ -193,7 +234,8 @@ test_that("IntegrateLayers works with JointPCAIntegration", {
     Embeddings(integrated[["integrated"]])[75, 45],
     0.5442
   )
-  
+  # check that the integrated reduction has the specified number of
+  # `dims.to.integrate`
   integrated_sub <- suppressWarnings(
     IntegrateLayers(
       test.data.std,
@@ -210,6 +252,25 @@ test_that("IntegrateLayers works with JointPCAIntegration", {
   )
   # check that dims.to.integrate is not being overwritten
   expect_equal(ncol(integrated_sub[["integrated"]]), 10)
+
+  integrated_overflow <- suppressWarnings(
+    IntegrateLayers(
+      test.data.std,
+      method = JointPCAIntegration,
+      orig.reduction = "pca",
+      new.reduction = "integrated",
+      verbose = FALSE,
+      # since `k.weight` must be less than the number of samples in the
+      # smallest layer being integrated, it must be set to accommodate the
+      # small dataset used for testing
+      k.weight = 10,
+      dims.to.integrate = 1:100
+    )
+  )
+  # check that the integrated reduction is the same as you'd get if you
+  # didn't specify `dims.to.integrate` (i.e. the same size as the initial
+  # reduction)
+  expect_equal(Embeddings(integrated_overflow), Embeddings(integrated))
 })
 
 test_that("IntegrateLayers fails when expected", {

From d2a5c1dc4a16402328c0acef351b78c2e6e69eff Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:09:40 -0500
Subject: [PATCH 131/166] Ensure dims.to.integrate is updated if too large

---
 R/integration.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/integration.R b/R/integration.R
index 39899bede..8ef4d47d1 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -6319,7 +6319,7 @@ ValidateParams_IntegrateEmbeddings_IntegrationAnchors <- function(
       warning("Max dims.to.integrate is larger than the number of dimensions in ",
               "the provided reduction. Setting dims.to.integrate to 1:",
               ncol(x = reductions), " and continuing.", immediate. = TRUE, call. = FALSE)
-      ModifyParam(param = 'dims.to.integrate', value = 1:ncol(x = reductions))
+      dims.to.integrate <- 1:ncol(x = reductions)
     }
     ModifyParam(param = 'dims.to.integrate', value = dims.to.integrate)
   }

From 62894809e7a415e500e74e4daeebbbd23702ed2a Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:13:59 -0500
Subject: [PATCH 132/166] Update changelog

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index e81712cd5..106d27ce6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter.
 - Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))
 - Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))
 - Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))

From 1ae5263d430375e08b2696b949d7ab381c94b811 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:15:24 -0500
Subject: [PATCH 133/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 907f1ea1f..eab97d73a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9015
+Version: 5.1.0.9016
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 358cddca3afe87dd08111a4d1f4310802e939728 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 16 Dec 2024 23:24:42 -0500
Subject: [PATCH 134/166] Drop @importFrom purrr imap from Load10X_Spatial

---
 R/preprocessing.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index b3dbae668..1266b29bf 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -512,7 +512,6 @@ GetResidual <- function(
 #' @importFrom png readPNG
 #' @importFrom grid rasterGrob
 #' @importFrom jsonlite fromJSON
-#' @importFrom purrr imap
 #'
 #' @export
 #' @concept preprocessing

From 67e65df099a903a3662e80e719b5e43108e84fff Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 16 Dec 2024 23:25:07 -0500
Subject: [PATCH 135/166] Drop @importFrom grid rasterGrob from Load10X_Spatial

---
 R/preprocessing.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 1266b29bf..4a6e80512 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -510,7 +510,6 @@ GetResidual <- function(
 #' @return A \code{Seurat} object
 #'
 #' @importFrom png readPNG
-#' @importFrom grid rasterGrob
 #' @importFrom jsonlite fromJSON
 #'
 #' @export

From 2885752967675b97b37e55711de29ef60a6e94f3 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Mon, 16 Dec 2024 23:27:10 -0500
Subject: [PATCH 136/166] Add `image.type` param to Read10X_Image

---
 R/preprocessing.R | 50 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 4a6e80512..c4e46c4eb 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -1211,6 +1211,7 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) {
 #' @param slice Name for the image, used to populate the instance's key
 #' @param filter.matrix Filter spot/feature matrix to only include spots that
 #' have been determined to be over tissue
+#' @param image.type Image type to return, one of: "VisiumV1" or "VisiumV2"
 #'
 #' @return A \code{\link{VisiumV2}} object
 #'
@@ -1224,8 +1225,13 @@ Read10X_Image <- function(
   image.name = "tissue_lowres_image.png",
   assay = "Spatial",
   slice = "slice1",
-  filter.matrix = TRUE
+  filter.matrix = TRUE,
+  image.type = "VisiumV2"
 ) {
+  # Validate the `image.type` parameter.
+  image.type <- match.arg(image.type, choices = c("VisiumV1", "VisiumV2"))
+
+  # Read in the H&E stain image.
   image <- png::readPNG(
     source = file.path(
       image.dir,
@@ -1233,28 +1239,54 @@ Read10X_Image <- function(
     )
   )
 
-  # read in the scale factors
+  # Read in the scale factors.
   scale.factors <- Read10X_ScaleFactors(
     filename = file.path(image.dir, "scalefactors_json.json")
   )
 
-  # read in the tissue coordinates as a data.frame
+  # Read in the tissue coordinates as a data.frame.
   coordinates <- Read10X_Coordinates(
     filename = Sys.glob(file.path(image.dir, "*tissue_positions*")),
     filter.matrix
   )
-  # create an `sp` compatible `FOV` instance
+
+  # Use the `slice` value to populate a Seurat-style identifier for the image.
+  key <- Key(slice, quiet = TRUE)
+
+  # Return the specified `image.type`.
+  if (image.type == "VisiumV1") {
+    visium.v1 <- new(
+      Class = image.type,
+      assay = assay,
+      key = key,
+      coordinates = coordinates,
+      scale.factors = scale.factors,
+      image = image
+    )
+
+    # As of v5.1.0 `Radius.VisiumV1` no longer returns the value of the 
+    # `spot.radius` slot and instead calculates the value on the fly, but we 
+    # can populate the static slot in case it's depended on.
+    visium.v1@spot.radius <- Radius(visium.v1)
+
+    return(visium.v1)
+  }
+
+  # If `image.type` is not "VisiumV1" then it must be "VisiumV2".
+  stopifnot(image.type == "VisiumV2")
+
+  # Create an `sp` compatible `FOV` instance.
   fov <- CreateFOV(
     coordinates[, c("imagerow", "imagecol")],
     type = "centroids",
     radius = scale.factors[["spot"]],
     assay = assay,
-    key = Key(slice, quiet = TRUE)
+    key = key
   )
 
-  # build the final `VisiumV2` - essentially just adding `image` and
-  # `scale.factors` to the object
-  visium.fov <- new(
+  # Build the final `VisiumV2` instance, essentially just adding `image` and
+  # `scale.factors` to the `fov`.
+  visium.v2 <- new(
     Class = "VisiumV2",
     boundaries = fov@boundaries,
     molecules = fov@molecules,
@@ -1264,7 +1296,7 @@ Read10X_Image <- function(
     scale.factors = scale.factors
   )
 
-  return(visium.fov)
+  return(visium.v2)
 }
 
 #' Load 10X Genomics Visium Tissue Positions

From f966fc3147068ad1ce03fdfd418b3ba4f6618e30 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 17 Dec 2024 00:30:41 -0500
Subject: [PATCH 137/166] Add test case for image.type to test_load_10X.R

---
 tests/testthat/test_load_10X.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/testthat/test_load_10X.R b/tests/testthat/test_load_10X.R
index f4f7a50f5..91b7f0902 100644
--- a/tests/testthat/test_load_10X.R
+++ b/tests/testthat/test_load_10X.R
@@ -132,6 +132,31 @@ test_that("Read10X_Image works as expected", {
     )
     # the size of the two images should be different
     expect_false(all(dim(image.hires) == dim(image.lowres)))
+
+    # `VisiumV1` image
+    image.v1 <- Read10X_Image(
+      path.to.image,
+      image.name = "tissue_lowres_image.png",
+      image.type = "VisiumV1"
+    )
+    coordinates <- GetTissueCoordinates(image.v1, scale = "lowres")
+    spot.radius <- Radius(image.v1, scale = "lowres")
+    scale.factors <- ScaleFactors(image.v1)
+    # check that the scale factors were read in as expected
+    expect_true(identical(scale.factors, scale.factors.expected))
+    # check that `coordinates` contains values scaled for the low resolution PNG
+    # also make sure that it has the expected column names
+    coordinates.expected.v1 <- coordinates.expected
+    colnames(coordinates.expected.v1) <- c("imagerow", "imagecol")
+    expect_equal(
+      coordinates[, c("imagerow", "imagecol")] / scale.factors[["lowres"]],
+      coordinates.expected.v1
+    )
+    # check that the spot size is similarly scaled
+    expect_equal(
+      (spot.radius / scale.factors[["lowres"]] * max(dim(image.lowres))),
+      scale.factors.expected[["spot"]],
+    )
   }
 })
 

From f48c536b0bf04c7d359e90d137b14192c9426aa7 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:45:26 -0500
Subject: [PATCH 138/166] Add missing docstring for GetImage.VisiumV2

---
 R/objects.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/objects.R b/R/objects.R
index 87177b0ae..86b9ae482 100644
--- a/R/objects.R
+++ b/R/objects.R
@@ -1630,6 +1630,14 @@ GetImage.VisiumV1 <- function(
   return(image)
 }
 
+
+#'
+#' @rdname GetImage
+#' @concept objects
+#' @concept spatial
+#' @method GetImage VisiumV2
+#' @export
+#'
 GetImage.VisiumV2 <- GetImage.VisiumV1
 
 #' Get Tissue Coordinates

From 6f489aa87c0022e46da1b59364cf6c4dbf858de6 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:47:25 -0500
Subject: [PATCH 139/166] Update docs

---
 NAMESPACE            | 2 +-
 man/GetImage.Rd      | 3 +++
 man/Read10X_Image.Rd | 5 ++++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 910b3f493..b58c9cf45 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -49,6 +49,7 @@ S3method(GetAssay,Seurat)
 S3method(GetImage,STARmap)
 S3method(GetImage,SlideSeq)
 S3method(GetImage,VisiumV1)
+S3method(GetImage,VisiumV2)
 S3method(GetTissueCoordinates,STARmap)
 S3method(GetTissueCoordinates,SlideSeq)
 S3method(GetTissueCoordinates,VisiumV1)
@@ -796,7 +797,6 @@ importFrom(plotly,plot_ly)
 importFrom(plotly,raster2uri)
 importFrom(png,readPNG)
 importFrom(progressr,progressor)
-importFrom(purrr,imap)
 importFrom(reticulate,import)
 importFrom(reticulate,py_module_available)
 importFrom(reticulate,py_set_seed)
diff --git a/man/GetImage.Rd b/man/GetImage.Rd
index a0d134863..186bbc8de 100644
--- a/man/GetImage.Rd
+++ b/man/GetImage.Rd
@@ -4,6 +4,7 @@
 \alias{GetImage.SlideSeq}
 \alias{GetImage.STARmap}
 \alias{GetImage.VisiumV1}
+\alias{GetImage.VisiumV2}
 \title{Get Image Data}
 \usage{
 \method{GetImage}{SlideSeq}(object, mode = c("grob", "raster", "plotly", "raw"), ...)
@@ -11,6 +12,8 @@
 \method{GetImage}{STARmap}(object, mode = c("grob", "raster", "plotly", "raw"), ...)
 
 \method{GetImage}{VisiumV1}(object, mode = c("grob", "raster", "plotly", "raw"), ...)
+
+\method{GetImage}{VisiumV2}(object, mode = c("grob", "raster", "plotly", "raw"), ...)
 }
 \arguments{
 \item{object}{An object}
diff --git a/man/Read10X_Image.Rd b/man/Read10X_Image.Rd
index 2b9a81d06..d54b1e03d 100644
--- a/man/Read10X_Image.Rd
+++ b/man/Read10X_Image.Rd
@@ -9,7 +9,8 @@ Read10X_Image(
   image.name = "tissue_lowres_image.png",
   assay = "Spatial",
   slice = "slice1",
-  filter.matrix = TRUE
+  filter.matrix = TRUE,
+  image.type = "VisiumV2"
 )
 }
 \arguments{
@@ -25,6 +26,8 @@ should include files \code{tissue_lowres_image.png},
 
 \item{filter.matrix}{Filter spot/feature matrix to only include spots that
 have been determined to be over tissue}
+
+\item{image.type}{Image type to return, one of: "VisiumV1" or "VisiumV2"}
 }
 \value{
 A \code{\link{VisiumV2}} object

From fff9055d7b47d479c7e6caa51b5bd261ef82a6bd Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:42:24 -0500
Subject: [PATCH 140/166] Update changelog

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 106d27ce6..f385e8135 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # Unreleased
 
 ## Changes
+- Added `image.type` parameter to `Read10X_Image` enabling `VisiumV1` instances to be populated instead of instances of the default `VisiumV2` class ([#9556](https://github.com/satijalab/seurat/pull/9556))
 - Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter.
 - Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))
 - Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))

From 549044a64e177bbdc60584ea28cc47aaf8bb113e Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 23:42:45 -0500
Subject: [PATCH 141/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index eab97d73a..fc78733a3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9016
+Version: 5.1.0.9017
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 9616713a05aec655bbe39e544814b7b61922879b Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 00:08:14 -0500
Subject: [PATCH 142/166] Drop purrr from imports

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fc78733a3..2cdac3190 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -68,7 +68,6 @@ Imports:
     plotly (>= 4.9.0),
     png,
     progressr,
-    purrr,
     RANN,
     RColorBrewer,
     Rcpp (>= 1.0.7),

From d9881018e545508e6edc6bb56acfd0ed4001cc56 Mon Sep 17 00:00:00 2001
From: rharao <rha.rao@gmail.com>
Date: Thu, 12 Dec 2024 14:12:56 -0500
Subject: [PATCH 143/166] Implement group.by arg to FindAllMarkers

---
 R/differential_expression.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/differential_expression.R b/R/differential_expression.R
index 515491545..0e245cfd0 100644
--- a/R/differential_expression.R
+++ b/R/differential_expression.R
@@ -46,6 +46,7 @@ FindAllMarkers <- function(
   object,
   assay = NULL,
   features = NULL,
+  group.by = NULL,
   logfc.threshold = 0.1,
   test.use = 'wilcox',
   slot = 'data',
@@ -75,6 +76,9 @@ FindAllMarkers <- function(
     return.thresh <- 0.7
   }
   if (is.null(x = node)) {
+    if (!is.null(x = group.by)) {
+      Idents(object = object) <- group.by
+      }
     idents.all <- sort(x = unique(x = Idents(object = object)))
   } else {
     if (!PackageCheck('ape', error = FALSE)) {

From 28af73f63150e6d06e0f621e48046215e2f6806d Mon Sep 17 00:00:00 2001
From: rharao <rha.rao@gmail.com>
Date: Fri, 13 Dec 2024 10:12:02 -0500
Subject: [PATCH 144/166] Handle "ident"; guard against group.by not in
 metadata

---
 R/differential_expression.R | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/R/differential_expression.R b/R/differential_expression.R
index 0e245cfd0..8d67c8393 100644
--- a/R/differential_expression.R
+++ b/R/differential_expression.R
@@ -76,9 +76,12 @@ FindAllMarkers <- function(
     return.thresh <- 0.7
   }
   if (is.null(x = node)) {
-    if (!is.null(x = group.by)) {
-      Idents(object = object) <- group.by
+    if (!is.null(x = group.by) && group.by != "ident") {
+      if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) {
+        stop("'", group.by, "' not found in object metadata")
       }
+      Idents(object = object) <- group.by
+    }
     idents.all <- sort(x = unique(x = Idents(object = object)))
   } else {
     if (!PackageCheck('ape', error = FALSE)) {
@@ -900,7 +903,7 @@ FindMarkers.DimReduc <- function(
 #' use all other cells for comparison; if an object of class \code{phylo} or
 #' 'clustertree' is passed to \code{ident.1}, must pass a node to find markers for
 #' @param group.by Regroup cells into a different identity class prior to 
-#' performing differential expression (see example)
+#' performing differential expression (see example); \code{"ident"} to use Idents
 #' @param subset.ident Subset a particular identity class prior to regrouping. 
 #' Only relevant if group.by is set (see example)
 #' @param assay Assay to use in differential expression testing
@@ -923,10 +926,13 @@ FindMarkers.Seurat <- function(
   reduction = NULL,
   ...
 ) {
-  if (!is.null(x = group.by)) {
+  if (!is.null(x = group.by) && group.by != "ident") {
     if (!is.null(x = subset.ident)) {
       object <- subset(x = object, idents = subset.ident)
     }
+    if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) {
+      stop("'", group.by, "' not found in object metadata")
+    }
     Idents(object = object) <- group.by
   }
   if (!is.null(x = assay) && !is.null(x = reduction)) {

From e93d0f51db2c8804a862e76a8710a2cdfe9811f6 Mon Sep 17 00:00:00 2001
From: rharao <18608184+rharao@users.noreply.github.com>
Date: Fri, 13 Dec 2024 12:08:57 -0500
Subject: [PATCH 145/166] roxygenize

---
 man/FindAllMarkers.Rd | 4 ++++
 man/FindMarkers.Rd    | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 man/FindAllMarkers.Rd
 mode change 100644 => 100755 man/FindMarkers.Rd

diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd
old mode 100644
new mode 100755
index a2f9f764e..35183b632
--- a/man/FindAllMarkers.Rd
+++ b/man/FindAllMarkers.Rd
@@ -9,6 +9,7 @@ FindAllMarkers(
   object,
   assay = NULL,
   features = NULL,
+  group.by = NULL,
   logfc.threshold = 0.1,
   test.use = "wilcox",
   slot = "data",
@@ -37,6 +38,9 @@ FindAllMarkers(
 
 \item{features}{Genes to test. Default is to use all genes}
 
+\item{group.by}{Regroup cells into a different identity class prior to 
+performing differential expression (see example); \code{"ident"} to use Idents}
+
 \item{logfc.threshold}{Limit testing to genes which show, on average, at least
 X-fold difference (log-scale) between the two groups of cells. Default is 0.1
 Increasing logfc.threshold speeds up the function, but can miss weaker signals.
diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd
old mode 100644
new mode 100755
index 373d58190..4a437fc8f
--- a/man/FindMarkers.Rd
+++ b/man/FindMarkers.Rd
@@ -231,7 +231,7 @@ use all other cells for comparison; if an object of class \code{phylo} or
 'clustertree' is passed to \code{ident.1}, must pass a node to find markers for}
 
 \item{group.by}{Regroup cells into a different identity class prior to 
-performing differential expression (see example)}
+performing differential expression (see example); \code{"ident"} to use Idents}
 
 \item{subset.ident}{Subset a particular identity class prior to regrouping. 
 Only relevant if group.by is set (see example)}

From 0e9b6ffb19701482c27ea00d2711640dd433ccdb Mon Sep 17 00:00:00 2001
From: rharao <18608184+rharao@users.noreply.github.com>
Date: Fri, 13 Dec 2024 12:13:29 -0500
Subject: [PATCH 146/166] stray file mode change

---
 man/FindAllMarkers.Rd | 0
 man/FindMarkers.Rd    | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 man/FindAllMarkers.Rd
 mode change 100755 => 100644 man/FindMarkers.Rd

diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd
old mode 100755
new mode 100644
diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd
old mode 100755
new mode 100644

From 44e500bbfb670ac9f03fd880101f9f25163f0ff5 Mon Sep 17 00:00:00 2001
From: rharao <18608184+rharao@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:19:48 -0500
Subject: [PATCH 147/166] Ensure condition length 1

---
 R/differential_expression.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/differential_expression.R b/R/differential_expression.R
index 8d67c8393..486171e4f 100644
--- a/R/differential_expression.R
+++ b/R/differential_expression.R
@@ -76,7 +76,7 @@ FindAllMarkers <- function(
     return.thresh <- 0.7
   }
   if (is.null(x = node)) {
-    if (!is.null(x = group.by) && group.by != "ident") {
+    if (!is.null(x = group.by) && !identical(x = group.by, y = "ident")) {
       if (length(x = group.by) == 1 && ! group.by %in% colnames(x = object@meta.data)) {
         stop("'", group.by, "' not found in object metadata")
       }

From ffd4a20beec5d506eeba71dd41f9828bd1aab013 Mon Sep 17 00:00:00 2001
From: rharao <18608184+rharao@users.noreply.github.com>
Date: Mon, 16 Dec 2024 09:26:46 -0500
Subject: [PATCH 148/166] ensure condition length 1

---
 R/differential_expression.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/differential_expression.R b/R/differential_expression.R
index 486171e4f..778fb0431 100644
--- a/R/differential_expression.R
+++ b/R/differential_expression.R
@@ -926,7 +926,7 @@ FindMarkers.Seurat <- function(
   reduction = NULL,
   ...
 ) {
-  if (!is.null(x = group.by) && group.by != "ident") {
+  if (!is.null(x = group.by) && !identical(x = group.by, y = "ident")) {
     if (!is.null(x = subset.ident)) {
       object <- subset(x = object, idents = subset.ident)
     }

From 51654e54938019e952247ca890e96c97783ad668 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 17:38:22 -0500
Subject: [PATCH 149/166] Move FindAllMarkers calls inside test_that

---
 tests/testthat/test_differential_expression.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R
index f0c685670..ce21a6659 100644
--- a/tests/testthat/test_differential_expression.R
+++ b/tests/testthat/test_differential_expression.R
@@ -366,12 +366,13 @@ test_that("BPCells FindMarkers gives same results", {
 
 # Tests for FindAllMarkers
 # -------------------------------------------------------------------------------
-results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small,pseudocount.use=1)))
-results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use=1)))
-results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use=1, vst.flavor = "v1")))
-results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1)))
 
 test_that("FindAllMarkers works as expected", {
+  results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 1)))
+  results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use = 1)))
+  results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use = 1, vst.flavor = "v1")))
+  results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1)))
+
   expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene"))
   expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18)
   expect_equal(results[1, "avg_log2FC"], -6.030507, tolerance = 1e-6)

From 1ae260bcdda2056cc97d84f592a0ecdec6f36f01 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 17:59:49 -0500
Subject: [PATCH 150/166] Add test case for FindMarkers group.by

---
 tests/testthat/test_differential_expression.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R
index ce21a6659..060d485e1 100644
--- a/tests/testthat/test_differential_expression.R
+++ b/tests/testthat/test_differential_expression.R
@@ -368,10 +368,14 @@ test_that("BPCells FindMarkers gives same results", {
 # -------------------------------------------------------------------------------
 
 test_that("FindAllMarkers works as expected", {
+  pbmc_copy <- pbmc_small
+  Idents(pbmc_copy) <- "orig.ident"
+
   results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 1)))
   results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use = 1)))
   results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use = 1, vst.flavor = "v1")))
   results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1)))
+  results.gb <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_copy, pseudocount.use = 1, group.by = "RNA_snn_res.1")))
 
   expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene"))
   expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18)
@@ -408,6 +412,10 @@ test_that("FindAllMarkers works as expected", {
   expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15)
   expect_equal(nrow(x = results.pseudo), 222)
   expect_equal(rownames(results.pseudo)[1], "HLA-DPB1")
+
+  # Setting `group.by` the group by parameter is equivalent
+  # to setting the object's `Idents` before running `FindAllMarkers`.
+  expect_equal(results.gb, results)
 })
 
 

From f93380a057b36490f2101c3fc21058dcc544fb35 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Thu, 19 Dec 2024 18:16:37 -0500
Subject: [PATCH 151/166] Raise warning when `node` and `group.by` are both set

---
 R/differential_expression.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/differential_expression.R b/R/differential_expression.R
index 778fb0431..6e45e8cfe 100644
--- a/R/differential_expression.R
+++ b/R/differential_expression.R
@@ -87,6 +87,14 @@ FindAllMarkers <- function(
     if (!PackageCheck('ape', error = FALSE)) {
       stop(cluster.ape, call. = FALSE)
     }
+    if (!is.null(group.by)) {
+      warning(
+        paste0(
+          "The `group.by` parameter for `FindAllMarkers` ",
+          "is ignored when `node` is set."
+        )
+      )
+    }
     tree <- Tool(object = object, slot = 'BuildClusterTree')
     if (is.null(x = tree)) {
       stop("Please run 'BuildClusterTree' before finding markers on nodes")

From 8723dcc4d4c83fd50c41819df8875eaef0891c81 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 12:53:04 -0500
Subject: [PATCH 152/166] Update changelog

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index f385e8135..abfd8dbd8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,8 @@
 # Unreleased
 
 ## Changes
+- Added `group.by` parameter to `FindAllMarkers`, allowing users to regroup their data using a non-default identity class prior to performing differential expression ([#9550](https://github.com/satijalab/seurat/pull/9550))
+#' performing differential expression (see example); \code{"ident"} to use Idents
 - Added `image.type` parameter to `Read10X_Image` enabling `VisiumV1` instances to be populated instead of instances of the default `VisiumV2` class ([#9556](https://github.com/satijalab/seurat/pull/9556))
 - Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter.
 - Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))

From 9c9a859b13c5a125fa6b9d329f281cbbfd5f2eae Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 12:53:22 -0500
Subject: [PATCH 153/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2cdac3190..bdf90336f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9017
+Version: 5.1.0.9018
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 57000eac5fe673c3372cf8ec525b6c888a908a35 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 07:37:23 -0500
Subject: [PATCH 154/166] Add master as trigger for Integration-Checks

---
 .github/workflows/integration_checks.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/integration_checks.yaml b/.github/workflows/integration_checks.yaml
index 0d725d8d0..a8e6e3f86 100644
--- a/.github/workflows/integration_checks.yaml
+++ b/.github/workflows/integration_checks.yaml
@@ -6,9 +6,11 @@ on:
   push:
     branches: 
     - develop
+    - master
   pull_request:
     branches: 
     - develop
+    - master
 
 jobs:
   check-package:

From 0dc30a29b2cd939539dcdff436b9bf045977bb71 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 10:10:31 -0500
Subject: [PATCH 155/166] Drop Travis CI badges from README

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 370cd40b7..56a7e94b1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,3 @@
-[![Build Status](https://travis-ci.com/satijalab/seurat.svg?branch=master)](https://app.travis-ci.com:443/github/satijalab/seurat)
 [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/satijalab/seurat?branch=master&svg=true)](https://ci.appveyor.com/project/satijalab/seurat)
 [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat)
 [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat)

From ff9d9df519c41ef92a4a3805224f24c7b8c29656 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 00:31:51 -0500
Subject: [PATCH 156/166] Fixup MULTIseqDemux reference tag

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index c4e46c4eb..d010eda9a 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -850,7 +850,7 @@ LoadCurioSeeker <- function(data.dir, assay = "Spatial") {
 #' @export
 #' @concept preprocessing
 #'
-#' @references \url{https://doi.org/10.1038/s41592-019-0433-8}
+#' @references \doi{10.1038/s41592-019-0433-8}
 #'
 #' @examples
 #' \dontrun{

From 541487d7d74e0a2a04c79fc34c6517aa617ab1f9 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 00:54:13 -0500
Subject: [PATCH 157/166] Update docs

---
 man/MULTIseqDemux.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/MULTIseqDemux.Rd b/man/MULTIseqDemux.Rd
index d7255d008..b1c88567f 100644
--- a/man/MULTIseqDemux.Rd
+++ b/man/MULTIseqDemux.Rd
@@ -42,6 +42,6 @@ object <- MULTIseqDemux(object)
 
 }
 \references{
-\url{https://doi.org/10.1038/s41592-019-0433-8}
+\doi{10.1038/s41592-019-0433-8}
 }
 \concept{preprocessing}

From d8d9aad942a0329bffa13333a88e5cea2aac38e0 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 01:40:42 -0500
Subject: [PATCH 158/166] Add cran.r-universe.dev as extra repo for enrichR

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bdf90336f..fa788872f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,7 @@ Authors@R: c(
 License: MIT + file LICENSE
 URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat
 BugReports: https://github.com/satijalab/seurat/issues
-Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev
+Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev, https://cran.r-universe.dev
 Depends:
     R (>= 4.0.0),
     methods,

From 9414eab1a5ffc485ec1ff654e294bf81764db565 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 13:27:20 -0500
Subject: [PATCH 159/166] Bump version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fa788872f..7c43840c4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9018
+Version: 5.1.0.9019
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From f8846dc775075f90470775d246eda2a8421ea215 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 01:40:54 -0500
Subject: [PATCH 160/166] Bump version to 5.2.0

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7c43840c4..bd3d2bc72 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: Seurat
-Version: 5.1.0.9019
+Version: 5.2.0
 Title: Tools for Single Cell Genomics
 Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
 Authors@R: c(

From 27476c86e3041b50cecc19b01360dbf988c15991 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 01:41:03 -0500
Subject: [PATCH 161/166] Finalize changelog

---
 NEWS.md | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index abfd8dbd8..b7d6e9359 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,27 +1,21 @@
-# Unreleased
+# Seurat 5.2.0 (2024-12-20)
 
 ## Changes
 - Added `group.by` parameter to `FindAllMarkers`, allowing users to regroup their data using a non-default identity class prior to performing differential expression ([#9550](https://github.com/satijalab/seurat/pull/9550))
-#' performing differential expression (see example); \code{"ident"} to use Idents
 - Added `image.type` parameter to `Read10X_Image` enabling `VisiumV1` instances to be populated instead of instances of the default `VisiumV2` class ([#9556](https://github.com/satijalab/seurat/pull/9556))
-- Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter.
+- Fixed `IntegrateLayers` to respect the `dims.to.integrate` parameter
 - Added `stroke.size` parameter to `DimPlot` ([#8180](https://github.com/satijalab/seurat/pull/8180))
 - Updated `RunLeiden` to use the `leidenbase` package instead of `leiden`; deprecated the `method` parameter for `RunLeiden` and `FindClusters`; updated `RunLeiden` to reset `random.seed` to 1 if the value is 0 or less ([#6792](https://github.com/satijalab/seurat/pull/6792))
-- Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
-- Surfaced more fine-grained control over what parts of a Xenium experiment are loaded in `LoadXenium`
-- Added ability to load Xenium nucleus segmentation masks
-- Updated `LoadXenium` to also read some run metadata (run start time, preservation method, panel used, organism, tissue type, instrument software version and stain kit used) into `misc` slot
-- Updated `ReadXenium` to load cell_feature_matrix.h5 when present in favor of the MEX format files
-- Added ability to read Xenium `segmentation_method` directly into `meta.data`
-- Updated `ReadXenium` to load .parquet files using `arrow` instead of .csv.gz files to support XOA 3.0
+- Updated `RunPCA` to use the `BPCells`-provided SVD solver on `BPCells` matrices; updated `JackStraw` to support `BPCells` matrices ([#8271](https://github.com/satijalab/seurat/pull/8271))
+- Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices ([#8966](https://github.com/satijalab/seurat/pull/8966))
 - Updated `RunSLSI` to support `BPCells` matrices
-- Fixed `LoadXenium` to accommodate datasets without "Blank Codeword" or "Unassigned Codeword" matrices
+- Updated `RunUMAP` to support `umap-learn` version >= 0.5.0 ([#9559](https://github.com/satijalab/seurat/pull/9559))
+- Updated `LoadXenium` and `ReadXenium` to accommodate the output from `XOA` v3.0; updated `LoadXenium` to provide more fine-grained control over the datatypes parsed in, including nucleus segmentation masks, segmentation methods, and other experimental metadata; updated `ReadXenium` to load cell_feature_matrix.h5 when present in favor of the MEX format files; updated `ReadXenium` to load .parquet files using `arrow` instead of .csv.gz files to support XOA 3.0 ([#8604](https://github.com/satijalab/seurat/pull/8605))
+- Fixed `LoadXenium` to accommodate datasets without "Blank Codeword" or "Unassigned Codeword" matrices([#9135](https://github.com/satijalab/seurat/pull/9135))
 - Fixed `ReadXenium` to properly parse multiple molecular outputs at once ([#8265](https://github.com/satijalab/seurat/issues/8265))
-- Fixed `RunPCA` to avoid converting `BPCells` matrices into dense matrices - significantly reduces the function's memory usage when running on `BPCells` matrices
 - Added `features` parameter to `LeverageScore` and `SketchData`
 - Updated `SketchData`'s `ncells` parameter to accept integer vector
-- Updated `JackStraw` to support `BPCells` matrices
-- Updated `RunPCA` to use the `BPCells`-provided SVD solver on `BPCells` matrices
+
 
 # Seurat 5.1.0 (2024-05-08)
 

From bda91eb070269fd198523fc4312983c409d424c5 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 14:34:33 -0500
Subject: [PATCH 162/166] Update CRAN comments

---
 cran-comments.md | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/cran-comments.md b/cran-comments.md
index 09e951b1f..fed512508 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,31 +1,34 @@
-# Seurat v5.1.0
+# Seurat v5.2.0
 
 ## Test environments
-* local ubuntu 20.04 install, R 4.3.2
-* local macOS 14.1, R 4.4.0
+* local ubuntu 20.04 install, R 4.4.2
 * win-builder (oldrelease, release, devel)
+* mac-builder (devel)
+
+We were unable to test on r-release on mac-builder because the portal seemed to point to the wrong version.
 
 ## R CMD check results
 
 There were no ERRORs or WARNINGs
 
-There were two NOTEs
-
-> Suggests or Enhances not in mainstream repositories:
->   BPCells, presto
-> Availability using Additional_repositories specification:
->   BPCells   yes   https://bnprks.r-universe.dev   
->   presto    yes   https://satijalab.r-universe.dev
+There was one NOTE
 
-> * checking package dependencies ... NOTE
-> Package suggested but not available for checking: 'BPCells', 'presto'
+❯ checking CRAN incoming feasibility ... [12s/61s] NOTE
+  Maintainer: ‘Rahul Satija <seurat@nygenome.org>’
+  
+  Suggests or Enhances not in mainstream repositories:
+    BPCells, enrichR, presto
+  Availability using Additional_repositories specification:
+    BPCells   yes   https://bnprks.r-universe.dev   
+    enrichR   yes   https://cran.r-universe.dev     
+    presto    yes   https://satijalab.r-universe.dev
 
-BPCells and presto are hosted on R-universe and used conditionally in Seurat.
+BPCells, enrichR, and presto are hosted on R-universe and used conditionally in Seurat.
 
 ## Downstream dependencies
 
-There are three packages that depend on Seurat: CACIMAR, scCustomize, and SCdeconR; this update does not impact their functionality
+There are 3 packages that depend on Seurat: CACIMAR, scCustomize, and SCdeconR; this update does not impact their functionality.
 
-There are 30 packages that import Seurat: AnanseSeurat, APackOfTheClones, bbknnR, CAMML, DR.SC, DWLS, GeneNMF, ggsector, mixhvg, nebula, Platypus, PRECAST, ProFAST, rPanglaoDB, scAnnotate, scaper, sccca, scDiffCom, scGate, scGOclust, scMappR, scperturbR, scpoisson, SCRIP, scRNAstat, SignacX, SoupX, SPECK, STREAK, and tidyseurat; this update does not impact their functionality
+There are 34 packages that import Seurat: AnanseSeurat, APackOfTheClones, bbknnR, CAESAR.Suite, CAMML, DR.SC, DWLS, GeneNMF, ggsector, mixhvg, nebula, Platypus, PoweREST, PRECAST, ProFAST, rPanglaoDB, scAnnotate, scaper, sccca, scDiffCom, scGate, scGOclust, SCIntRuler, scMappR, scperturbR, scpoisson, SCRIP, scRNAstat, SignacX, SoupX, SpaCCI, SPECK, STREAK, and tidyseurat; this update does not impact their functionality.
 
-There are 22 packages that suggest Seurat: BisqueRNA, Canek, cellpypes, CIARA, ClustAssess, clustree, combiroc, conos, countland, CRMetrics, CytoSimplex, DIscBIO, dyngen, grandR, harmony, RESET, rliger, SCORPIUS, SCpubr, Signac, treefit, and VAM; this update does not impact their functionality
+There are 27 packages that suggest Seurat: BisqueRNA, Canek, cellpypes, CIARA, ClustAssess, clustree, combiroc, conos, countland, CRMetrics, CytoSimplex, DIscBIO, dyngen, easybio, grandR, harmony, laminr, mxfda, RESET, rliger, SCORPIUS, SCpubr, scregclust, Signac, SuperCell, treefit, and VAM; this update does not impact their functionality.

From c34662062e741f1be95795967c7f271a55bb5459 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 16:30:39 -0500
Subject: [PATCH 163/166] Fix bad roxygen2 \link{} targets

Fix bad link(s) in roxygen2 block for TopNeighbors
Fix bad link(s) in roxygen2 block for RunCCA
Fix bad link(s) in roxygen2 block for fortify.Centroids
Fix bad link(s) in roxygen2 block for SCTAssay
Fix bad link(s) in roxygen2 block for TransferData
Fix bad link(s) in roxygen2 block for RPCAIntegration
Fix bad link(s) in roxygen2 block for PrepSCTIntegration
Fix bad link(s) in roxygen2 block for IntegrateEmbeddings
Fix bad link(s) in roxygen2 block for IntegrateData
Fix bad link(s) in roxygen2 block for FindTransferAnchors
Fix bad link(s) in roxygen2 block for FindNeighbors generic
Fix bad link(s) in roxygen2 block for FindNeighbors.default
Fix bad link(s) in roxygen2 block for SCTModel
Fix bad link(s) in roxygen2 block for MapQueryData
Fix bad link(s) in roxygen2 block for PairwiseIntegrateReference
Fix bad link(s) in roxygen2 block for RunIntegration
---
 R/clustering.R    |  2 +-
 R/generics.R      | 16 ++++++++--------
 R/integration.R   | 34 +++++++++++++++++-----------------
 R/integration5.R  |  2 +-
 R/objects.R       |  8 ++++----
 R/visualization.R |  6 +++---
 6 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/R/clustering.R b/R/clustering.R
index 6af115589..8ecddefd4 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -508,7 +508,7 @@ FindClusters.Seurat <- function(
 #' distance matrix; note, for objects of class \code{dist}, this parameter will
 #' be set automatically
 #' @param k.param Defines k for the k-nearest neighbor algorithm
-#' @param return.neighbor Return result as \code{\link{Neighbor}} object. Not
+#' @param return.neighbor Return result as \code{\link[SeuratObject]{Neighbor}} object. Not
 #' used with distance matrix input.
 #' @param compute.SNN also compute the shared nearest neighbor graph
 #' @param prune.SNN Sets the cutoff for acceptable Jaccard index when
diff --git a/R/generics.R b/R/generics.R
index 2e64f7ce8..20239e852 100644
--- a/R/generics.R
+++ b/R/generics.R
@@ -156,13 +156,13 @@ FindMarkers <- function(object, ...) {
 #' @param object An object
 #' @param ... Arguments passed to other methods
 #'
-#' @return This function can either return a \code{\link{Neighbor}} object
-#' with the KNN information or a list of \code{\link{Graph}} objects with
+#' @return This function can either return a \code{\link[SeuratObject]{Neighbor}} object
+#' with the KNN information or a list of \code{\link[SeuratObject]{Graph}} objects with
 #' the KNN and SNN depending on the settings of \code{return.neighbor} and
-#' \code{compute.SNN}. When running on a \code{\link{Seurat}} object, this
-#' returns the \code{\link{Seurat}} object with the Graphs or Neighbor objects
+#' \code{compute.SNN}. When running on a \code{\link[SeuratObject]{Seurat}} object, this
+#' returns the \code{\link[SeuratObject]{Seurat}} object with the Graphs or Neighbor objects
 #' stored in their respective slots. Names of the Graph or Neighbor object can
-#' be found with \code{\link{Graphs}} or \code{\link{Neighbors}}.
+#' be found with \code{\link[SeuratObject]{Graphs}} or \code{\link[SeuratObject]{Neighbors}}.
 #'
 #' @examples
 #' data("pbmc_small")
@@ -277,7 +277,7 @@ GetAssay <- function(object, ...) {
 #' @param reductions Name of reductions to be integrated. For a
 #' TransferAnchorSet, this should be the name of a reduction present in the
 #' anchorset object (for example, "pcaproject"). For an IntegrationAnchorSet,
-#' this should be a \code{\link{DimReduc}} object containing all cells present
+#' this should be a \code{\link[SeuratObject]{DimReduc}} object containing all cells present
 #' in the anchorset object.
 #' @param dims.to.integrate Number of dimensions to return integrated values for
 #' @param weight.reduction Dimension reduction to use when calculating anchor
@@ -287,7 +287,7 @@ GetAssay <- function(object, ...) {
 #'    all objects to be integrated}
 #'    \item{A vector of strings, specifying the name of a dimension reduction to
 #'    use for each object to be integrated}
-#'    \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
+#'    \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, specifying the object to
 #'    use for each object in the integration}
 #'    \item{NULL, in which case the full corrected space is used for computing
 #'    anchor weights.}
@@ -470,7 +470,7 @@ PseudobulkExpression <- function(object, ...) {
 #'
 #' @return Returns a combined Seurat object with the CCA results stored.
 #'
-#' @seealso \code{\link{merge.Seurat}}
+#' @seealso \code{\link[SeuratObject]{merge.Seurat}}
 #'
 #' @examples
 #' \dontrun{
diff --git a/R/integration.R b/R/integration.R
index 8ef4d47d1..8efd4c636 100644
--- a/R/integration.R
+++ b/R/integration.R
@@ -634,8 +634,8 @@ ReciprocalProject <- function(
 #'   these scores to dampen outlier effects and rescale to range between 0-1.}
 #' }
 #'
-#' @param reference \code{\link{Seurat}} object to use as the reference
-#' @param query \code{\link{Seurat}} object to use as the query
+#' @param reference \code{\link[SeuratObject]{Seurat}} object to use as the reference
+#' @param query \code{\link[SeuratObject]{Seurat}} object to use as the query
 #' @param reference.assay Name of the Assay to use from reference
 #' @param reference.neighbors Name of the Neighbor to use from the reference.
 #' Optionally enables reuse of precomputed neighbors.
@@ -689,7 +689,7 @@ ReciprocalProject <- function(
 #' @param n.trees More trees gives higher precision when using annoy approximate
 #' nearest neighbor search
 #' @param eps Error bound on the neighbor finding algorithm (from
-#' \code{\link{RANN}} or \code{\link{RcppAnnoy}})
+#' \code{\link[RANN]{RANN}} or \code{\link[RcppAnnoy]{RcppAnnoy}})
 #' @param approx.pca Use truncated singular value decomposition to approximate
 #' PCA
 #' @param mapping.score.k Compute and store nearest k query neighbors in the
@@ -1334,8 +1334,8 @@ GetTransferPredictions <- function(object, assay = "predictions", slot = "data",
 #'    all objects to be integrated}
 #'    \item{A vector of strings, specifying the name of a dimension reduction to
 #'    use for each object to be integrated}
-#'    \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
-#'    use for each object in the integration}
+#'    \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, 
+#'    specifying the object to use for each object in the integration}
 #'    \item{NULL, in which case a new PCA will be calculated and used to
 #'    calculate anchor weights}
 #' }
@@ -1364,11 +1364,11 @@ GetTransferPredictions <- function(object, assay = "predictions", slot = "data",
 #' @param preserve.order Do not reorder objects based on size for each pairwise
 #' integration.
 #' @param eps Error bound on the neighbor finding algorithm (from
-#' \code{\link{RANN}})
+#' \code{\link[RANN]{RANN}})
 #' @param verbose Print progress bars and output
 #'
-#' @return Returns a \code{\link{Seurat}} object with a new integrated
-#' \code{\link{Assay}}. If \code{normalization.method = "LogNormalize"}, the
+#' @return Returns a \code{\link[SeuratObject]{Seurat}} object with a new integrated
+#' \code{\link[SeuratObject]{Assay}}. If \code{normalization.method = "LogNormalize"}, the
 #' integrated data is returned to the \code{data} slot and can be treated as
 #' log-normalized, corrected data. If \code{normalization.method = "SCT"}, the
 #' integrated data is returned to the \code{scale.data} slot and can be treated
@@ -2756,10 +2756,10 @@ MixingMetric <- function(
 #'   anchor.features for efficiency in downstream processing. }
 #' }
 #'
-#' @param object.list A list of \code{\link{Seurat}} objects to prepare for integration
-#' @param assay The name of the \code{\link{Assay}} to use for integration. This can be a
+#' @param object.list A list of \code{\link[SeuratObject]{Seurat}} objects to prepare for integration
+#' @param assay The name of the \code{\link[SeuratObject]{Assay}} to use for integration. This can be a
 #' single name if all the assays to be integrated have the same name, or a character vector
-#' containing the name of each \code{\link{Assay}} in each object to be integrated. The
+#' containing the name of each \code{\link[SeuratObject]{Assay}} in each object to be integrated. The
 #' specified assays must have been normalized using \code{\link{SCTransform}}.
 #' If NULL (default), the current default assay for each object is used.
 #' @param anchor.features Can be either:
@@ -2773,7 +2773,7 @@ MixingMetric <- function(
 #' the Pearson residual will be clipped to
 #' @param verbose Display output/messages
 #'
-#' @return A list of \code{\link{Seurat}} objects with the appropriate \code{scale.data} slots
+#' @return A list of \code{\link[SeuratObject]{Seurat}} objects with the appropriate \code{scale.data} slots
 #' containing only the required \code{anchor.features}.
 #'
 #' @importFrom pbapply pblapply
@@ -3219,7 +3219,7 @@ SelectSCTIntegrationFeatures <- function(
 #'    \item{lsiproject: Use the projected LSI used for anchor building}
 #'    \item{pca: Use an internal PCA on the query only}
 #'    \item{cca: Use the CCA used for anchor building}
-#'    \item{custom DimReduc: User provided \code{\link{DimReduc}} object
+#'    \item{custom DimReduc: User provided \code{\[SeuratObject]{DimReduc}} object
 #'    computed on the query cells}
 #' }
 #' @param l2.norm Perform L2 normalization on the cell embeddings after
@@ -3230,7 +3230,7 @@ SelectSCTIntegrationFeatures <- function(
 #' @param k.weight Number of neighbors to consider when weighting anchors
 #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting
 #' @param eps Error bound on the neighbor finding algorithm (from
-#' \code{\link{RANN}})
+#' \code{\link[RANN]{RANN}})
 #' @param n.trees More trees gives higher precision when using annoy approximate
 #' nearest neighbor search
 #' @param verbose Print progress bars and output
@@ -4599,7 +4599,7 @@ GetCellOffsets <- function(anchors, dataset, cell, cellnames.list, cellnames) {
 # query, and weights will need to be calculated for all cells in the object.
 # @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting
 # @param preserve.order Do not reorder objects based on size for each pairwise integration.
-# @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}})
+# @param eps Error bound on the neighbor finding algorithm (from \code{\link[RANN]{RANN}})
 # @param verbose Print progress bars and output
 #
 # @return Returns an integrated matrix
@@ -4742,7 +4742,7 @@ NNtoMatrix <- function(idx, distance, k) {
 # @param preserve.order Do not reorder objects based on size for each pairwise
 # integration.
 # @param eps Error bound on the neighbor finding algorithm (from
-# \code{\link{RANN}})
+# \code{\link[RANN]{RANN}})
 # @param verbose Print progress bars and output
 #
 # @return Returns a Seurat object with a new integrated Assay
@@ -5497,7 +5497,7 @@ ReferenceRange <- function(x, lower = 0.025, upper = 0.975) {
 # query, and weights will need to be calculated for all cells in the object.
 # @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting
 # @param sample.tree Specify the order of integration. If NULL, will compute automatically.
-# @param eps Error bound on the neighbor finding algorithm (from \code{\link{RANN}})
+# @param eps Error bound on the neighbor finding algorithm (from \code{\link[RANN]{RANN}})
 # @param verbose Print progress bars and output
 #
 RunIntegration <- function(
diff --git a/R/integration5.R b/R/integration5.R
index c0b7034d7..62d6d937e 100644
--- a/R/integration5.R
+++ b/R/integration5.R
@@ -288,7 +288,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration'
 #' @param object A \code{Seurat} object
 #' @param assay Name of \code{Assay} in the \code{Seurat} object
 #' @param layers Names of layers in \code{assay}
-#' @param orig A \link[SeuratObject:DimReduc]{dimensional reduction} to correct
+#' @param orig A \link[SeuratObject]{DimReduc} to correct
 #' @param new.reduction Name of new integrated dimensional reduction
 #' @param reference A reference \code{Seurat} object
 #' @param features A vector of features to use for integration
diff --git a/R/objects.R b/R/objects.R
index 86b9ae482..50addbbfe 100644
--- a/R/objects.R
+++ b/R/objects.R
@@ -188,7 +188,7 @@ IntegrationData <- setClass(
 #' @slot arguments other information used in SCTransform
 #' @slot median_umi Median UMI (or scale factor) used to calculate corrected counts
 #'
-#' @seealso \code{\link{Assay}}
+#' @seealso \code{\link[SeuratObject]{Assay}}
 #'
 #' @name SCTAssay-class
 #' @rdname SCTAssay-class
@@ -215,12 +215,12 @@ SCTModel <- setClass(
 
 #' The SCTAssay Class
 #'
-#' The SCTAssay object contains all the information found in an \code{\link{Assay}}
+#' The SCTAssay object contains all the information found in an \code{\link[SeuratObject]{Assay}}
 #' object, with extra information from the results of \code{\link{SCTransform}}
 #'
 #' @slot SCTModel.list A list containing SCT models
 #'
-#' @seealso \code{\link{Assay}}
+#' @seealso \code{\link[SeuratObject]{Assay}}
 #'
 #' @name SCTAssay-class
 #' @rdname SCTAssay-class
@@ -904,7 +904,7 @@ TopCells <- function(object, dim = 1, ncells = 20, balanced = FALSE, ...) {
 #'
 #' Return a vector of cell names of the nearest n cells.
 #'
-#' @param object \code{\link{Neighbor}} object
+#' @param object \code{\link[SeuratObject]{Neighbor}} object
 #' @param cell Cell of interest
 #' @param n Number of neighbors to return
 #'
diff --git a/R/visualization.R b/R/visualization.R
index 64e5c9a52..65ef5c94a 100644
--- a/R/visualization.R
+++ b/R/visualization.R
@@ -6320,9 +6320,9 @@ WhiteBackground <- function(...) {
 #' Prepare Coordinates for Spatial Plots
 #'
 #' @inheritParams SeuratObject::GetTissueCoordinates
-#' @param model A \code{\linkS4class{Segmentation}},
-#' \code{\linkS4class{Centroids}},
-#' or \code{\linkS4class{Molecules}} object
+#' @param model A \code{\link[SeuratObject:Segmentation-class]{Segmentation}},
+#' \code{\link[SeuratObject:Centroids-class]{Centroids}},
+#' or \code{\link[SeuratObject:Molecules-class]{Molecules}} object
 #' @param data Extra data to be used for annotating the cell segmentations; the
 #' easiest way to pass data is a one-column
 #' \code{\link[base:data.frame]{data frame}} with the values to color by and

From 8e04665158ccb3ab54ada59d606a093812ecf135 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 17:02:37 -0500
Subject: [PATCH 164/166] Fix bad roxygen2 reference for RunSPCA

---
 R/generics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/generics.R b/R/generics.R
index 20239e852..4ef6f41c9 100644
--- a/R/generics.R
+++ b/R/generics.R
@@ -600,7 +600,7 @@ RunSLSI <- function(object, ...) {
 #' @references Barshan E, Ghodsi A, Azimifar Z, Jahromi MZ.
 #' Supervised principal component analysis: Visualization, classification and
 #' regression on subspaces and submanifolds.
-#' Pattern Recognition. 2011 Jul 1;44(7):1357-71. \url{https://www.sciencedirect.com/science/article/pii/S0031320310005819?casa_token=AZMFg5OtPnAAAAAA:_Udu7GJ7G2ed1-XSmr-3IGSISUwcHfMpNtCj-qacXH5SBC4nwzVid36GXI3r8XG8dK5WOQui};
+#' Pattern Recognition. 2011 Jul 1;44(7):1357-71. \url{doi:10.1016/j.patcog.2010.12.015};
 #' @export
 #'
 #' @rdname RunSPCA

From c69156840239fff1635daf0a04cdd7555a577320 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Fri, 20 Dec 2024 16:58:13 -0500
Subject: [PATCH 165/166] Update docs

---
 man/CCAIntegration.Rd      |  4 ++--
 man/FindNeighbors.Rd       | 12 ++++++------
 man/FindTransferAnchors.Rd |  6 +++---
 man/IntegrateData.Rd       | 10 +++++-----
 man/IntegrateEmbeddings.Rd |  4 ++--
 man/JointPCAIntegration.Rd |  4 ++--
 man/PrepSCTIntegration.Rd  |  8 ++++----
 man/RPCAIntegration.Rd     |  4 ++--
 man/RunCCA.Rd              |  2 +-
 man/RunSPCA.Rd             |  2 +-
 man/SCTAssay-class.Rd      |  6 +++---
 man/TopNeighbors.Rd        |  2 +-
 man/TransferData.Rd        |  4 ++--
 man/fortify-Spatial.Rd     |  6 +++---
 14 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd
index b8399ad04..f4f1e5b0a 100644
--- a/man/CCAIntegration.Rd
+++ b/man/CCAIntegration.Rd
@@ -33,7 +33,7 @@ CCAIntegration(
 
 \item{layers}{Names of layers in \code{assay}}
 
-\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct}
+\item{orig}{A \link[SeuratObject]{DimReduc} to correct}
 
 \item{new.reduction}{Name of new integrated dimensional reduction}
 
@@ -61,7 +61,7 @@ weights. This can be one of:
    all objects to be integrated}
    \item{A vector of strings, specifying the name of a dimension reduction to
    use for each object to be integrated}
-   \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
+   \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, specifying the object to
    use for each object in the integration}
    \item{NULL, in which case the full corrected space is used for computing
    anchor weights.}
diff --git a/man/FindNeighbors.Rd b/man/FindNeighbors.Rd
index 0c4b8c703..de80c0ab5 100644
--- a/man/FindNeighbors.Rd
+++ b/man/FindNeighbors.Rd
@@ -98,7 +98,7 @@ be set automatically}
 
 \item{k.param}{Defines k for the k-nearest neighbor algorithm}
 
-\item{return.neighbor}{Return result as \code{\link{Neighbor}} object. Not
+\item{return.neighbor}{Return result as \code{\link[SeuratObject]{Neighbor}} object. Not
 used with distance matrix input.}
 
 \item{compute.SNN}{also compute the shared nearest neighbor graph}
@@ -152,13 +152,13 @@ neighbor (NN) graph, and the second element used to store the SNN graph. If
 only one name is supplied, only the NN graph is stored.}
 }
 \value{
-This function can either return a \code{\link{Neighbor}} object
-with the KNN information or a list of \code{\link{Graph}} objects with
+This function can either return a \code{\link[SeuratObject]{Neighbor}} object
+with the KNN information or a list of \code{\link[SeuratObject]{Graph}} objects with
 the KNN and SNN depending on the settings of \code{return.neighbor} and
-\code{compute.SNN}. When running on a \code{\link{Seurat}} object, this
-returns the \code{\link{Seurat}} object with the Graphs or Neighbor objects
+\code{compute.SNN}. When running on a \code{\link[SeuratObject]{Seurat}} object, this
+returns the \code{\link[SeuratObject]{Seurat}} object with the Graphs or Neighbor objects
 stored in their respective slots. Names of the Graph or Neighbor object can
-be found with \code{\link{Graphs}} or \code{\link{Neighbors}}.
+be found with \code{\link[SeuratObject]{Graphs}} or \code{\link[SeuratObject]{Neighbors}}.
 }
 \description{
 Computes the \code{k.param} nearest neighbors for a given dataset. Can also
diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd
index f0dfbbc60..4ea81c058 100644
--- a/man/FindTransferAnchors.Rd
+++ b/man/FindTransferAnchors.Rd
@@ -33,9 +33,9 @@ FindTransferAnchors(
 )
 }
 \arguments{
-\item{reference}{\code{\link{Seurat}} object to use as the reference}
+\item{reference}{\code{\link[SeuratObject]{Seurat}} object to use as the reference}
 
-\item{query}{\code{\link{Seurat}} object to use as the query}
+\item{query}{\code{\link[SeuratObject]{Seurat}} object to use as the query}
 
 \item{normalization.method}{Name of normalization method used: LogNormalize
 or SCT.}
@@ -109,7 +109,7 @@ annoy}
 nearest neighbor search}
 
 \item{eps}{Error bound on the neighbor finding algorithm (from
-\code{\link{RANN}} or \code{\link{RcppAnnoy}})}
+\code{\link[RANN]{RANN}} or \code{\link[RcppAnnoy]{RcppAnnoy}})}
 
 \item{approx.pca}{Use truncated singular value decomposition to approximate
 PCA}
diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd
index e08bd682e..399fdc3fc 100644
--- a/man/IntegrateData.Rd
+++ b/man/IntegrateData.Rd
@@ -47,8 +47,8 @@ weights. This can be one of:
 all objects to be integrated}
 \item{A vector of strings, specifying the name of a dimension reduction to
 use for each object to be integrated}
-\item{A vector of \code{\link{DimReduc}} objects, specifying the object to
-use for each object in the integration}
+\item{A vector of \code{\link[SeuratObject]{DimReduc}} objects,
+specifying the object to use for each object in the integration}
 \item{NULL, in which case a new PCA will be calculated and used to
 calculate anchor weights}
 }
@@ -80,13 +80,13 @@ If NULL, the sample tree will be computed automatically.}
 integration.}
 
 \item{eps}{Error bound on the neighbor finding algorithm (from
-\code{\link{RANN}})}
+\code{\link[RANN]{RANN}})}
 
 \item{verbose}{Print progress bars and output}
 }
 \value{
-Returns a \code{\link{Seurat}} object with a new integrated
-\code{\link{Assay}}. If \code{normalization.method = "LogNormalize"}, the
+Returns a \code{\link[SeuratObject]{Seurat}} object with a new integrated
+\code{\link[SeuratObject]{Assay}}. If \code{normalization.method = "LogNormalize"}, the
 integrated data is returned to the \code{data} slot and can be treated as
 log-normalized, corrected data. If \code{normalization.method = "SCT"}, the
 integrated data is returned to the \code{scale.data} slot and can be treated
diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd
index 304d0500a..61a185f92 100644
--- a/man/IntegrateEmbeddings.Rd
+++ b/man/IntegrateEmbeddings.Rd
@@ -49,7 +49,7 @@ IntegrateEmbeddings(anchorset, ...)
 \item{reductions}{Name of reductions to be integrated. For a
 TransferAnchorSet, this should be the name of a reduction present in the
 anchorset object (for example, "pcaproject"). For an IntegrationAnchorSet,
-this should be a \code{\link{DimReduc}} object containing all cells present
+this should be a \code{\link[SeuratObject]{DimReduc}} object containing all cells present
 in the anchorset object.}
 
 \item{dims.to.integrate}{Number of dimensions to return integrated values for}
@@ -63,7 +63,7 @@ weights. This can be one of:
    all objects to be integrated}
    \item{A vector of strings, specifying the name of a dimension reduction to
    use for each object to be integrated}
-   \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
+   \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, specifying the object to
    use for each object in the integration}
    \item{NULL, in which case the full corrected space is used for computing
    anchor weights.}
diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd
index 3f2ab73f7..a390c1f23 100644
--- a/man/JointPCAIntegration.Rd
+++ b/man/JointPCAIntegration.Rd
@@ -33,7 +33,7 @@ JointPCAIntegration(
 
 \item{layers}{Names of layers in \code{assay}}
 
-\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct}
+\item{orig}{A \link[SeuratObject]{DimReduc} to correct}
 
 \item{new.reduction}{Name of new integrated dimensional reduction}
 
@@ -61,7 +61,7 @@ weights. This can be one of:
    all objects to be integrated}
    \item{A vector of strings, specifying the name of a dimension reduction to
    use for each object to be integrated}
-   \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
+   \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, specifying the object to
    use for each object in the integration}
    \item{NULL, in which case the full corrected space is used for computing
    anchor weights.}
diff --git a/man/PrepSCTIntegration.Rd b/man/PrepSCTIntegration.Rd
index d3fdecae4..8052e10e2 100644
--- a/man/PrepSCTIntegration.Rd
+++ b/man/PrepSCTIntegration.Rd
@@ -13,11 +13,11 @@ PrepSCTIntegration(
 )
 }
 \arguments{
-\item{object.list}{A list of \code{\link{Seurat}} objects to prepare for integration}
+\item{object.list}{A list of \code{\link[SeuratObject]{Seurat}} objects to prepare for integration}
 
-\item{assay}{The name of the \code{\link{Assay}} to use for integration. This can be a
+\item{assay}{The name of the \code{\link[SeuratObject]{Assay}} to use for integration. This can be a
 single name if all the assays to be integrated have the same name, or a character vector
-containing the name of each \code{\link{Assay}} in each object to be integrated. The
+containing the name of each \code{\link[SeuratObject]{Assay}} in each object to be integrated. The
 specified assays must have been normalized using \code{\link{SCTransform}}.
 If NULL (default), the current default assay for each object is used.}
 
@@ -35,7 +35,7 @@ the Pearson residual will be clipped to}
 \item{verbose}{Display output/messages}
 }
 \value{
-A list of \code{\link{Seurat}} objects with the appropriate \code{scale.data} slots
+A list of \code{\link[SeuratObject]{Seurat}} objects with the appropriate \code{scale.data} slots
 containing only the required \code{anchor.features}.
 }
 \description{
diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd
index 0e4589d68..3c93933c2 100644
--- a/man/RPCAIntegration.Rd
+++ b/man/RPCAIntegration.Rd
@@ -33,7 +33,7 @@ RPCAIntegration(
 
 \item{layers}{Names of layers in \code{assay}}
 
-\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct}
+\item{orig}{A \link[SeuratObject]{DimReduc} to correct}
 
 \item{new.reduction}{Name of new integrated dimensional reduction}
 
@@ -61,7 +61,7 @@ weights. This can be one of:
    all objects to be integrated}
    \item{A vector of strings, specifying the name of a dimension reduction to
    use for each object to be integrated}
-   \item{A vector of \code{\link{DimReduc}} objects, specifying the object to
+   \item{A vector of \code{\link[SeuratObject]{DimReduc}} objects, specifying the object to
    use for each object in the integration}
    \item{NULL, in which case the full corrected space is used for computing
    anchor weights.}
diff --git a/man/RunCCA.Rd b/man/RunCCA.Rd
index aa2c6b14b..71091ba23 100644
--- a/man/RunCCA.Rd
+++ b/man/RunCCA.Rd
@@ -91,6 +91,6 @@ print(x = pbmc_cca[["cca"]])
 
 }
 \seealso{
-\code{\link{merge.Seurat}}
+\code{\link[SeuratObject]{merge.Seurat}}
 }
 \concept{dimensional_reduction}
diff --git a/man/RunSPCA.Rd b/man/RunSPCA.Rd
index 11c23986e..87fb7efa3 100644
--- a/man/RunSPCA.Rd
+++ b/man/RunSPCA.Rd
@@ -99,6 +99,6 @@ matrix factorization.
 Barshan E, Ghodsi A, Azimifar Z, Jahromi MZ.
 Supervised principal component analysis: Visualization, classification and
 regression on subspaces and submanifolds.
-Pattern Recognition. 2011 Jul 1;44(7):1357-71. \url{https://www.sciencedirect.com/science/article/pii/S0031320310005819?casa_token=AZMFg5OtPnAAAAAA:_Udu7GJ7G2ed1-XSmr-3IGSISUwcHfMpNtCj-qacXH5SBC4nwzVid36GXI3r8XG8dK5WOQui};
+Pattern Recognition. 2011 Jul 1;44(7):1357-71. \url{doi:10.1016/j.patcog.2010.12.015};
 }
 \concept{dimensional_reduction}
diff --git a/man/SCTAssay-class.Rd b/man/SCTAssay-class.Rd
index d116a62e9..5feba288a 100644
--- a/man/SCTAssay-class.Rd
+++ b/man/SCTAssay-class.Rd
@@ -27,7 +27,7 @@
 The SCTModel object is a model and parameters storage from SCTransform.
 It can be used to calculate Pearson residuals for new genes.
 
-The SCTAssay object contains all the information found in an \code{\link{Assay}}
+The SCTAssay object contains all the information found in an \code{\link[SeuratObject]{Assay}}
 object, with extra information from the results of \code{\link{SCTransform}}
 }
 \section{Slots}{
@@ -93,8 +93,8 @@ levels(pbmc_small[['SCT']])
 
 }
 \seealso{
-\code{\link{Assay}}
+\code{\link[SeuratObject]{Assay}}
 
-\code{\link{Assay}}
+\code{\link[SeuratObject]{Assay}}
 }
 \concept{objects}
diff --git a/man/TopNeighbors.Rd b/man/TopNeighbors.Rd
index 64dc5cc67..1c029803a 100644
--- a/man/TopNeighbors.Rd
+++ b/man/TopNeighbors.Rd
@@ -7,7 +7,7 @@
 TopNeighbors(object, cell, n = 5)
 }
 \arguments{
-\item{object}{\code{\link{Neighbor}} object}
+\item{object}{\code{\link[SeuratObject]{Neighbor}} object}
 
 \item{cell}{Cell of interest}
 
diff --git a/man/TransferData.Rd b/man/TransferData.Rd
index e30977df7..4df588c51 100644
--- a/man/TransferData.Rd
+++ b/man/TransferData.Rd
@@ -53,7 +53,7 @@ anchors. Options are:
    \item{lsiproject: Use the projected LSI used for anchor building}
    \item{pca: Use an internal PCA on the query only}
    \item{cca: Use the CCA used for anchor building}
-   \item{custom DimReduc: User provided \code{\link{DimReduc}} object
+   \item{custom DimReduc: User provided \code{\[SeuratObject]{DimReduc}} object
    computed on the query cells}
 }}
 
@@ -69,7 +69,7 @@ weighting.}
 \item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting}
 
 \item{eps}{Error bound on the neighbor finding algorithm (from
-\code{\link{RANN}})}
+\code{\link[RANN]{RANN}})}
 
 \item{n.trees}{More trees gives higher precision when using annoy approximate
 nearest neighbor search}
diff --git a/man/fortify-Spatial.Rd b/man/fortify-Spatial.Rd
index e2d13bcf8..b19fea1b6 100644
--- a/man/fortify-Spatial.Rd
+++ b/man/fortify-Spatial.Rd
@@ -15,9 +15,9 @@
 \method{fortify}{Segmentation}(model, data, ...)
 }
 \arguments{
-\item{model}{A \code{\linkS4class{Segmentation}},
-\code{\linkS4class{Centroids}},
-or \code{\linkS4class{Molecules}} object}
+\item{model}{A \code{\link[SeuratObject:Segmentation-class]{Segmentation}},
+\code{\link[SeuratObject:Centroids-class]{Centroids}},
+or \code{\link[SeuratObject:Molecules-class]{Molecules}} object}
 
 \item{data}{Extra data to be used for annotating the cell segmentations; the
 easiest way to pass data is a one-column

From c93a1b74d965c9ed5d8f976bfc98f04948c4d676 Mon Sep 17 00:00:00 2001
From: David Collins <dcollins@nygenome.org>
Date: Tue, 7 Jan 2025 16:43:19 -0500
Subject: [PATCH 166/166] Drop cran.r-universe.dev from Additional_repositories

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bd3d2bc72..25fda37bb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,7 @@ Authors@R: c(
 License: MIT + file LICENSE
 URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat
 BugReports: https://github.com/satijalab/seurat/issues
-Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev, https://cran.r-universe.dev
+Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev
 Depends:
     R (>= 4.0.0),
     methods,