From 3fd911a277d5e78c84c9f3f713a19de928bc9ba2 Mon Sep 17 00:00:00 2001 From: Marcel Levstek <62072754+marcellevstek@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:55:48 +0100 Subject: [PATCH] fixup! Make changes to ``gtf-to-bed`` process --- .../support_processors/gtf_to_bed.py | 23 +++++++++---------- .../processes/test_support_processors.py | 2 ++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/resolwe_bio/processes/support_processors/gtf_to_bed.py b/resolwe_bio/processes/support_processors/gtf_to_bed.py index 1b57081e1..b6d084173 100644 --- a/resolwe_bio/processes/support_processors/gtf_to_bed.py +++ b/resolwe_bio/processes/support_processors/gtf_to_bed.py @@ -15,7 +15,7 @@ class GTFtoBED(Process): - """GTF to BED conversion. + """GTF to BED conversion for predefined genes and feature types. Note that this process only works with ENSEMBL annotations. """ @@ -100,7 +100,7 @@ class Input: "geneset", label="Gene set", description="Gene set to use for filtering.", - required=False, + required=True, ) canonical_transcripts = DataField( @@ -181,17 +181,16 @@ def run(self, inputs, outputs): gtf = gtf[gtf["source"].isin(inputs.annotation_source)] gtf = gtf[gtf["feature_type"] == feature_type] - if inputs.geneset: - if inputs.annotation.output.species != inputs.geneset.output.species: - self.error( - "Gene set data object species does not match the annotation species." - ) - geneset = pd.read_csv( - inputs.geneset.output.geneset.path, - delimiter="\t", - names=["ID"], + if inputs.annotation.output.species != inputs.geneset.output.species: + self.error( + "Species of the gene set data object does not match the species of the annotation data object." ) - gtf = gtf[gtf["gene_id"].isin(geneset["ID"])] + geneset = pd.read_csv( + inputs.geneset.output.geneset.path, + delimiter="\t", + names=["ID"], + ) + gtf = gtf[gtf["gene_id"].isin(geneset["ID"])] if inputs.canonical_transcripts and not feature_type == "gene": if ( diff --git a/resolwe_bio/tests/processes/test_support_processors.py b/resolwe_bio/tests/processes/test_support_processors.py index 09783c676..2e7a64d30 100644 --- a/resolwe_bio/tests/processes/test_support_processors.py +++ b/resolwe_bio/tests/processes/test_support_processors.py @@ -1789,6 +1789,7 @@ def test_gtf_to_bed(self): "gtf-to-bed", { "annotation": gtf.id, + "geneset": geneset.id, "annotation_field": "gene_name", }, ) @@ -1798,6 +1799,7 @@ def test_gtf_to_bed(self): "gtf-to-bed", { "annotation": gtf.id, + "geneset": geneset.id, "annotation_field": "gene_id_feature_id", "feature_type": "exon", },