Merge pull request #33 from COMBINE-lab/develop

Develop
COMBINE-lab · Feb 23, 2023 · 8502dee · 8502dee
2 parents 68a7c56 + 3ee04c4
commit 8502dee
Show file tree

Hide file tree

Showing 7 changed files with 230 additions and 64 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "esbonio.sphinx.confDir": ""
+}
diff --git a/docs/.vscode/settings.json b/docs/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "esbonio.sphinx.confDir": ""
+}
diff --git a/docs/source/building_splici_index.rst b/docs/source/building_splici_index.rst
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -22,7 +22,7 @@
 author = "Dongze He, Rob Patro"
 
 # The full version, including alpha/beta/rc tags
-release = "0.6.0"
+release = "0.8.1"
 
 master_doc = "index"
 

diff --git a/docs/source/installing.rst b/docs/source/installing.rst
@@ -7,20 +7,11 @@ The ``pyroe`` package can be accessed from its `github repository <https://githu
 
   pip install pyroe
 
-
-To make use of the ``load_fry`` function (which, itself, installs `scanpy <https://scanpy.readthedocs.io/en/stable/>`_):
-
-.. code:: bash
-
-  pip install pyroe
-
-
 Alternatively, ``pyroe`` can be installed via ``bioconda``, which will automatically install the variant of the package including ``load_fry``, and will
-also install ``bedtools`` to enable faster construction of the *splici* reference.  This installation can be performed with the command:
+also install ``bedtools`` to enable faster construction of the expanded references.  This installation can be performed with the following shell command:
 
 .. code:: bash
 
-  conda install pyroe
+  conda install pyroe -c bioconda
 
 
-with the appropriate bioconda channel in the conda channel list.
diff --git a/docs/source/processing_fry_quants.rst b/docs/source/processing_fry_quants.rst
@@ -12,24 +12,24 @@ Required Parameters
 ^^^^^^^^^^^^^^^^^^^
 
 frydir : ``str``
-    The path to a output directory returned by alevin-fry quant command. \\
+    The path to a output directory returned by alevin-fry quant command. \
     The directory containing the alevin-fry quantification (i.e. the the quant.json file & alevin subdirectory).
 
 
 Optional Parameters
 ^^^^^^^^^^^^^^^^^^^
 
 output_format : ``str`` or ``dict``
-    A string represents one of the pre-defined output formats, which are "scRNA", "snRNA" and "velocity". \\
-    If a customized format of the returned `AnnData` is needed, one can pass a Dictionary.\\
+    A string represents one of the pre-defined output formats, which are "scRNA", "snRNA" and "velocity". \
+    If a customized format of the returned `AnnData` is needed, one can pass a Dictionary. \
     See Notes section for details.
 
 quiet : ``bool`` (default: ``True``)
-    True if function should be quiet.
+    True if function should be quiet.\
     False if messages (including error messages) should be printed out. 
 
 nonzero : ``bool`` (default: ``False``)
-    True if cells with non-zero expression value across all genes should be filtered in each layer.
+    True if cells with non-zero expression value across all genes should be filtered in each layer.\
     False if unexpressed genes should be kept.
 
 `load_fry` Notes
@@ -45,20 +45,22 @@ returned by alevin-fry.
 The following formats are defined:
 
 * "scRNA": \
-    This format is recommended for single cell RNA-sequencing experiments. 
-    It returns a `X` field that contains the S+A count of each gene in each cell without any extra layers.
+    This format is recommended for single cell RNA-sequencing experiments. \
+    It returns a `X` field that contains the S+A count of each gene in each cell , and an extra `unspliced` field that contains the U count of each gene in each cell.
 
-* "snRNA": \
-    This format is recommended for single nucleus RNA-sequencing experiments. 
-    It returns a `X` field that contains the U+S+A count of each gene in each cell without any extra layers.
+* "snRNA", "U+S+A", "all": \
+    These formats are recommended for single nucleus RNA-sequencing experiments. Furthermore, these formats match the behaviors of Cell Ranger 7, which by default includes all intronic reads in the output gene count matrix for both single-cell and single-nucleus experiments.\
+    These formats return a `X` field that contains the U+S+A count of each gene in each cell without any extra layers.
 
 * "raw": \
-    This format uses the S count matrix as the `X` field and put the U, S, and A counts into three 
-    separate layers, which are "unspliced", "spliced" and "ambiguous".
+    This format uses the S count matrix as the `X` field and put the U, S, and A counts into three separate layers, which are `unspliced`, `spliced` and `ambiguous`.
+
+* "S+A": \
+    This format uses the  U + S counts as the `X` field without any extra layers.
 
 * "velocity": \
-    This format is the same as "scRNA", except it contains two extra layers: the "spliced" layer, 
-    which contains the S+A counts, and the "unspliced" layer, which contains the U counts.
+    This format is the same as "scRNA", except it contains a `spliced` layer, 
+    which contains the S+A counts.
 
 A custom output format can be defined using a Dictionary specifying the desired format of the output ``Anndata`` object.  
 If the input is not a USA mode quantification directory, this parameter is ignored

diff --git a/src/pyroe/make_txome.py b/src/pyroe/make_txome.py
@@ -1,4 +1,5 @@
 import os
+import warnings
 import subprocess
 import shutil
 import pyranges as pr
@@ -254,7 +255,7 @@ def check_gr(gr, output_dir):
         gr = gr.insert(gene_df)
 
         # Then, records all exon records and gene records
-        clean_gr = pr.concat(clean_gr, gr[gr.Feature == "exon"])
+        clean_gr = pr.concat([clean_gr, gr[gr.Feature == "exon"]])
 
     # check if the transcripts and genes are well defined
     # first, we get the transcript annotation from exons and from the transcript feature records
@@ -633,8 +634,10 @@ def make_splici_txome(
     # get introns
     # the introns() function uses inplace=True argument from pandas,
     # which will trigger an FutureWarning.
-    # warnings.simplefilter(action="ignore", category=FutureWarning)
+    warnings.simplefilter(action="ignore", category=FutureWarning)
     introns = gr.features.introns(by="transcript")
+    warnings.simplefilter(action="default", category=FutureWarning)
+
     introns.Name = introns.gene_id
 
     if no_flanking_merge:
@@ -677,7 +680,7 @@ def make_splici_txome(
             "".join(
                 [
                     " Failed to refine intron bounds using genome bounds.",
-                    " Please check if the input genome FASTA file and GTF file match each other.",
+                    " Please check if the input genome FASTA file and GTF file match each other, especially the chromosome names.",
                     f" The error message was: {str(err)}",
                 ]
             ),