Skip to content

Commit

Permalink
Spladder parallel (#20)
Browse files Browse the repository at this point in the history
* add concurrency to spladder call and allow ScanNeo2 to finish workflow on a faulty spladder run (Thanks to  @nttg8100)
  • Loading branch information
riasc authored May 20, 2024
1 parent 5ab1364 commit 0ad049c
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 23 deletions.
30 changes: 16 additions & 14 deletions workflow/rules/altsplicing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,23 @@ rule spladder:
log:
"logs/{sample}/spladder/{group}_build.log"
params:
confidence=f"""--confidence {config["altsplicing"]["confidence"]}""",
iteration=f"""--iterations {config["altsplicing"]["iterations"]}""",
edgelimit=f"""--ase-edge-limit {config["altsplicing"]["edgelimit"]}"""
confidence=f"""{config["altsplicing"]["confidence"]}""",
iteration=f"""{config["altsplicing"]["iterations"]}""",
edgelimit=f"""{config["altsplicing"]["edgelimit"]}"""
threads: config['threads']
shell:
"""
spladder build -b {input.bam} \
-a resources/refs/genome.gtf \
-o {output} --filter-overlap-exons \
--no-primary-only --quantify-graph \
{params.confidence} \
{params.iteration} \
{params.edgelimit} \
--qmode all > {log} 2>&1
"""

"""
bash workflow/scripts/run_spladder.sh \
{input.bam} \
{threads} \
resources/refs/genome.gtf \
{output} \
{params.confidence} \
{params.iteration} \
{params.edgelimit} \
{log} > 2>&1
"""

rule splicing_to_vcf:
input:
"results/{sample}/rnaseq/altsplicing/spladder/{group}"
Expand Down
27 changes: 18 additions & 9 deletions workflow/scripts/altsplc2vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def main():
for root, dirs, files in os.walk(options.input):
for file in files:
if file.endswith(".txt.gz"):
print(file)
# determine type of splicing from file
pattern = re.compile(r'merge_graphs_(\w+)_C[1-3].confirmed.txt.gz')
match = pattern.search(file)
Expand All @@ -80,24 +81,31 @@ def main():
next(fh)
for line in fh:
column = line.decode().rstrip().split('\t')
print(column)

if (event_type == "alt_5prime" or
event_type == "alt_3prime" or
event_type == "exon_skip" or
event_type == "intron_retention" or
event_type == "mutex_exon"):

chrom = column[0]
strand = column[1]
event_id = column[2]

# https://github.com/ratschlab/spladder/issues/168
annotated = column[3]
try:
chrom = column[0]
strand = column[1]
event_id = column[2]

gene_name = column[4]
# https://github.com/ratschlab/spladder/issues/168
annotated = column[3]

start = int(column[7])
end = int(column[8])
gene_name = column[4]

start = int(column[7])
end = int(column[8])

# in some cases lines are incomplete (faulty spladder output)
except IndexError:
print("IndexError - skipping event")
continue

if (event_type == "alt_5prime" or
event_type == "alt_3prime" or
Expand Down Expand Up @@ -237,6 +245,7 @@ def main():
calls = [vcfpy.Call(sample=options.group, data={'GT': '0/1'})]
)
writer.write_record(rec)
writer.close()



Expand Down
31 changes: 31 additions & 0 deletions workflow/scripts/run_spladder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

input=$1
threads=$2
anno=$3
output=$4
confidence=$5
iterations=$6
edgelimit=$7
log=$8

# run spladder
spladder build \
-b $input \
--parallel $threads \
-a $anno \
-o $output \
--filter-overlap-exons \
--no-primary-only \
--quantify-graph \
--confidence $confidence \
--iterations $iterations \
--ase-edge-limit $edgelimit \
--qmode all > $log 2>&1

# check if spladder generated output (if not create folder)
if [ ! -d $output ]; then
echo "Spladder did not generate output. Creating empty folder."
mkdir -p $output
fi

0 comments on commit 0ad049c

Please sign in to comment.