-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #407 from lilab-bcb/yiming
Fix workflow issue with cellranger-arc
- Loading branch information
Showing
7 changed files
with
76 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
FROM debian:bullseye-slim | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
RUN apt-get update && \ | ||
apt-get install --no-install-recommends -y unzip rsync build-essential dpkg-dev curl gnupg procps python3 python3-pip && \ | ||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && apt-get install -y google-cloud-cli=392.0.0-0 | ||
|
||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.7.11.zip" -o "awscliv2.zip" && \ | ||
unzip awscliv2.zip && \ | ||
./aws/install && \ | ||
rm awscliv2.zip | ||
|
||
RUN pip3 install --upgrade pip && \ | ||
pip3 install pandas==1.4.3 && \ | ||
pip3 install packaging==21.3 && \ | ||
pip3 install stratocumulus==0.2.4 | ||
|
||
RUN mkdir /software | ||
ADD https://raw.githubusercontent.com/lilab-bcb/cumulus/master/docker/monitor_script.sh /software | ||
ADD cellranger-arc-2.0.2.tar.gz /software | ||
|
||
RUN apt-get -qq -y autoremove && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* /var/log/dpkg.log && \ | ||
rm -f /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python | ||
|
||
RUN chmod a+rx /software/monitor_script.sh | ||
ENV PATH=/software:/software/cellranger-arc-2.0.2:/usr/local/aws-cli/v2/current/bin:$PATH | ||
ENV TMPDIR=/tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import pandas as pd | ||
import pegasusio as io | ||
|
||
from pegasusio import MultimodalData, UnimodalData | ||
from typing import Union, Optional | ||
|
||
|
||
def filter_included_genes( | ||
data: Union[MultimodalData, UnimodalData], | ||
filt_features_tsv: pd.DataFrame, | ||
output_file: Optional[str] = None, | ||
) -> None: | ||
df_var = pd.read_csv(filt_features_tsv, sep='\t', header=None) | ||
assert df_var[0].nunique() == 18082, "Filtered genes inconsistent!" | ||
|
||
data._inplace_subset_var(data.var['featureid'].isin(df_var[0].values)) | ||
correct_gene_names(data) | ||
if output_file: | ||
io.write_output(data, output_file) | ||
|
||
def correct_gene_names(data): | ||
rename_dict = { | ||
"ENSG00000285053": "GGPS1-TBCE", | ||
"ENSG00000284770": "TBCE", | ||
"ENSG00000187522": "HSPA14", | ||
"ENSG00000284024": "MSANTD7", | ||
"ENSG00000269226": "TMSB15C", | ||
"ENSG00000158427": "TMSB15B", | ||
} | ||
df_var = data.var.reset_index() | ||
for gene_id, gene_name in rename_dict.items(): | ||
df_var.loc[df_var['featureid']==gene_id, 'featurekey'] = gene_name | ||
data.var = df_var.set_index('featurekey') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters