-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_Alevin_inDrops.sh
59 lines (52 loc) · 2.83 KB
/
run_Alevin_inDrops.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash #-- what is the language of this shell
# #-- Any line that starts with #$ is an instruction to SGE
#$ -S /bin/bash #-- the shell for the job
#$ -o alevin_log #-- output directory
#$ -e alevin_err #-- error directory
#$ -cwd #-- tell the job that it should start in your working directory
#$ -r y #-- tell the system that if a job crashes, it should be restarted
#$ -j y #-- tell the system that the STDERR and STDOUT should be joined
#$ -l mem_free=2G #-- submits on nodes with enough free memory (required)
#$ -l scratch=50G #-- SGE resources (home and scratch disks)
#$ -l h_rt=2:00:00 #-- runtime limit (hr:min:sec)
# Use this bash script for aligning an inDrops V2020 library to a Salmon index using Alevin
#
# Salmon-Alevin requires all metadata sequences (cell barcodes and UMI) to be contained within a single FASTQ file
# inDrops libraries, however, collect barcode information from two FASTQ files (R2 and R4). FASTQ files for
# R2 and R4 must therefore be merged prior to running this script, using "convert_inDropsFASTQVX_to_Alevin.sh"
#
# REQUIRED INPUTS:
# 1. A FASTQ file containing biological reads, e.g. DEW101.R1.fastq.gz
# 2. A FASTQ file containing metadata reads (cell barcodes + UMI), e.g. DEW101.R2R4.fastq.gz
# 3. A prebuilt Salmon index
# 4. A transcript-to-gene mapping file
#
# USAGE:
# Complete the user specified inputs below.
# This script can process a single library or a list of libraries.
# For each basename appearing in "list_of_fastq_basenames", this script expects two FASTQ files named as:
# (1) basename.R1.FASTQ.gz
# (2) basename.R2R4.FASTQ.gz
#
# User-specified inputs:
list_of_fastq_basenames=(DEW042 DEW043 DEW044 DEW045)
path_to_fastq_files='/wynton/home/wagner/dwagner/fastq/Wagner2018/'
path_to_salmon_index='/wynton/home/wagner/dwagner/references/salmon_index_grcz11_combined_FP'
path_to_t2g_map='/wynton/home/wagner/dwagner/references/salmon_index_grcz11_combined_FP/txp2gene.tsv'
output_path='/wynton/home/wagner/dwagner/projects/DEW/220715.Wagner2018/'
bc_geometry='1[1-16]'
umi_geometry='1[17-22] '
read_geometry='2[1-end]'
forceCells='20000'
numThreads='32'
# CODE TO EXECUTE ALEVIN (don't edit this part)
date
hostname
module load CBI
module load salmon
for bname in ${list_of_fastq_basenames[@]}
do
echo Mapping $bname
salmon alevin -lISR -i $path_to_salmon_index --tgMap $path_to_t2g_map -1 $path_to_fastq_files/${bname}.R2R4.fastq.gz -2 $path_to_fastq_files/${bname}.R1.fastq.gz -o $output_path/$bname --read-
geometry $read_geometry --bc-geometry $bc_geometry --umi-geometry $umi_geometry -p $numThreads --forceCells $forceCells --dumpFeatures --dumpMtx
done