Skip to content

Commit

Permalink
fixup: clean up exome and genome structs on release table
Browse files Browse the repository at this point in the history
  • Loading branch information
rileyhgrant committed Aug 12, 2024
1 parent 4368c8d commit 7ed499e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
4 changes: 0 additions & 4 deletions browser/help/topics/v4-browser-hts.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ Row fields:
- `hemizygote_count`: Number of hemizygous alternate individuals in this genetic ancestry group.
- `homozygote_count`: Number of homozygous alternate individuals in this genetic ancestry group.
- `non_ukb`: Struct containing variant frequency information from the non-UKB subset. Includes same fields as above struct (`all`).
- `faf95`: Struct containing information about the filtered allele frequency (FAF; 95% confidence interval [CI]).
- `grpmax`: Genetic ancestry group maximum (grpmax) FAF.
- `grpmax_gen_anc`: Genetic ancestry group associated with grpmax FAF.
- `faf99`: Struct containing information about the FAF (99% CI). Contains same subfields as above (`faf95`).
- `fafmax`: Struct containing information about the maximum FAF.
- `gnomad`: Struct containing information about the fafmax for all of gnomad for the exome data.
- `faf95_max`: Max FAF value for the (95% CI).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,13 @@ def freq_joint(ds, subset=None, pop=None, sex=None, raw=False):
return ds


def prepare_table_for_release(variants_table_path):
ds = hl.read_table(variants_table_path)
ds = ds.annotate(exomes=ds.exomes.drop("faf95", "faf99"), genomes=ds.genomes.drop("faf95", "faf99"))
ds = ds.select_globals(mane_select_version=ds.globals.mane_transcripts_version)
return ds


def prepare_gnomad_v4_variants(exome_variants_path: str, genome_variants_path: str, variants_joint_frequency_path: str):
exome_variants = prepare_gnomad_v4_variants_helper(exome_variants_path, "exome")
genome_variants = prepare_gnomad_v4_variants_helper(genome_variants_path, "genome")
Expand Down
10 changes: 10 additions & 0 deletions data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from data_pipeline.datasets.gnomad_v4.gnomad_v4_variants import (
prepare_gnomad_v4_variants,
prepare_table_for_release,
)


Expand Down Expand Up @@ -102,6 +103,15 @@
},
)

pipeline.add_task(
name="prepare_table_for_release",
task_function=prepare_table_for_release,
output_path=f"{output_sub_dir}/gnomad_v4_variants_a4_for_release.ht",
inputs={
"variants_table_path": pipeline.get_task("annotate_vrs_ids"),
},
)

###############################################
# Outputs
###############################################
Expand Down

0 comments on commit 7ed499e

Please sign in to comment.