Skip to content

Commit

Permalink
feat: re-enable Freebayes wrapper to generate data with artifacts (#466)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Nov 14, 2023
1 parent 4874074 commit c2bec07
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Pipfile as created by VS Code.
/Pipfile*

# Mac
.*DS_Store

Expand Down
21 changes: 21 additions & 0 deletions snappy_pipeline/workflows/variant_calling/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,27 @@ rule variant_calling_bcftools_call_run:
wf.wrapper_path("bcftools_call")


# Run FreeBayes ---------------------------------------------------------------


rule variant_calling_freebayes_run:
input:
unpack(wf.get_input_files("freebayes", "run")),
output:
**wf.get_output_files("freebayes", "run"),
threads: wf.get_resource("freebayes", "run", "threads")
resources:
time=wf.get_resource("freebayes", "run", "time"),
memory=wf.get_resource("freebayes", "run", "memory"),
partition=wf.get_resource("freebayes", "run", "partition"),
params:
**wf.get_params("freebayes", "run")(),
log:
**wf.get_log_file("freebayes", "run"),
wrapper:
wf.wrapper_path("freebayes")


# Run GATK HaplotypeCaller (direct, pedigree/multi-sample) --------------------


Expand Down
50 changes: 50 additions & 0 deletions snappy_pipeline/workflows/variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@
Disabled by default.
``bcftools_call``
Variant calling with Freebayes.
This caller is provided as it is useful to genereate variant calls with the same artifacts
as external pipelines.
Disabled by default.
=======
Reports
=======
Expand Down Expand Up @@ -283,6 +292,7 @@
#: Available germline variant callers
VARIANT_CALLERS = (
"bcftools_call",
"freebayes",
"gatk3_hc",
"gatk3_ug",
"gatk4_hc_gvcf",
Expand Down Expand Up @@ -331,6 +341,14 @@
max_indel_depth: 250
window_length: 10000000
num_threads: 16
freebayes:
use_standard_filters: true
window_length: 10000000
num_threads: 16
min_alternate_fraction: 0.05 # FreeBayes default
min_mapping_quality: 1 # FreeBayes default
min_repeat_entropy: 1 # FreeBayes default
haplotype_length: 3 # FreeBayes default
gatk3_hc:
num_threads: 16
window_length: 10000000
Expand Down Expand Up @@ -532,6 +550,38 @@ def get_resource_usage(self, action: str) -> ResourceUsage:
)


class FreebayesStepPart(VariantCallingStepPart):
"""Germline variant calling with freebayes"""

#: Step name
name = "freebayes"

def get_resource_usage(self, action):
self._validate_action(action)
return ResourceUsage(
threads=16,
time="2-00:00:00", # 2 days
memory=f"{int(3.75 * 1024 * 16)}M",
)

def get_params(self, action):
"""
:param action: Action (i.e., step) in the workflow. Currently only available for 'run'.
:type action: str
:return: Returns get parameters function.
:raises UnsupportedActionException: if action not 'run'.
"""
self._validate_action(action)
parameters_key_list = [
"window_length",
"min_alternate_fraction",
"min_mapping_quality",
"min_repeat_entropy",
"haplotype_length",
]
return {key: self.config["freebayes"][key] for key in parameters_key_list}


class GatkCallerStepPartBase(VariantCallingStepPart):
"""Base class for GATK v3/v4 variant callers"""

Expand Down
10 changes: 10 additions & 0 deletions snappy_wrappers/wrappers/freebayes/environmenty.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
channels:
- conda-forge
- bioconda

dependencies:
- python
- bcftools ==1.18
- bedtools
- freebayes ==1.3.7
- htslib ==1.18
100 changes: 100 additions & 0 deletions snappy_wrappers/wrappers/freebayes/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from snakemake import shell

__author__ = "Manuel Holtgrewe <[email protected]>"

shell.executable("/bin/bash")

# Build standard filters argument
arg_std_filters = ""
if snakemake.config["step_config"]["variant_calling"]["freebayes"]["use_standard_filters"]:
arg_std_filters = "--standard-filters"

# Build ignore regions argument
ignore_chroms = snakemake.config["step_config"]["variant_calling"]["freebayes"].get(
"ignore_chroms", ""
)
if ignore_chroms:
arg_ignore_chroms = "--ignore-chroms " + " ".join(map(repr, ignore_chroms))
else:
arg_ignore_chroms = ""

# Get number of threads parameter
num_threads = snakemake.config["step_config"]["variant_calling"]["freebayes"]["num_threads"]

# Define which freebayes mode to use
freebayes = "freebayes"
if num_threads > 1:
freebayes_parallel_tpl = (
"freebayes-parallel <(fasta_generate_regions.py {ref} {window_length}) {threads} "
)
freebayes = freebayes_parallel_tpl.format(
ref=snakemake.config["static_data_config"]["reference"]["path"],
window_length=snakemake.params.window_length,
threads=num_threads,
)

shell(
r"""
set -x
# Write out information about conda installation.
conda list > {snakemake.log.conda_list}
conda info > {snakemake.log.conda_info}
md5sum {snakemake.log.conda_list} > {snakemake.log.conda_list_md5}
md5sum {snakemake.log.conda_info} > {snakemake.log.conda_info_md5}
# Also pipe stderr to log file
if [[ -n "{snakemake.log.log}" ]]; then
if [[ "$(set +e; tty; set -e)" != "" ]]; then
rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
exec 2> >(tee -a "{snakemake.log.log}" >&2)
else
rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
echo "No tty,logging disabled" >"{snakemake.log.log}"
fi
fi
# Export library
export LD_LIBRARY_PATH=$(dirname $(which bgzip))/../lib
# Hack: get back bin directory of base/root environment
export PATH=$PATH:$(dirname $(dirname $(which conda)))/bin
export TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" EXIT
# Create shortcut to reference
export REF={snakemake.config[static_data_config][reference][path]}
# Call Freebayes
{freebayes} \
--fasta-reference $REF \
--genotype-qualities \
--min-repeat-entropy {snakemake.params.min_repeat_entropy} \
--haplotype-length {snakemake.params.haplotype_length} \
--min-alternate-fraction {snakemake.params.min_alternate_fraction} \
--min-mapping-quality {snakemake.params.min_mapping_quality} \
$(echo {snakemake.input} | tr ' ' '\n' | grep '\.bam$') \
| bcftools norm --fasta-ref $REF \
| snappy-vcf_sort $REF.fai \
> $TMPDIR/tmp.vcf
# Filter VCF if requested
if [[ -n "{arg_ignore_chroms}" ]]; then
# Create include regions bed file
snappy-genome_windows --fai-file $REF.fai {arg_ignore_chroms} \
| sed "s/,//g" | sed "s/[:-]/\t/g" \
> $TMPDIR/tmp.bed
# Intersect VCF file
bedtools intersect -a $TMPDIR/tmp.vcf -b $TMPDIR/tmp.bed -wa -header \
| bgzip -c \
> {snakemake.output.vcf}
else
bgzip -c $TMPDIR/tmp.vcf > {snakemake.output.vcf}
fi
# compute tabix index of the resulting VCF file
tabix -f {snakemake.output.vcf}
pushd $(dirname {snakemake.output.vcf}) && \
md5sum $(basename {snakemake.output.vcf}) > $(basename {snakemake.output.vcf}).md5 && \
md5sum $(basename {snakemake.output.tbi}) > $(basename {snakemake.output.tbi}).md5
"""
)

# Compute MD5 sums of logs.
shell(
r"""
md5sum {snakemake.log.log} > {snakemake.log.log_md5}
"""
)

0 comments on commit c2bec07

Please sign in to comment.