diff --git a/browser/help/topics/v4-browser-hts.md b/browser/help/topics/v4-browser-hts.md index 50b5ac7ab..3f8ea3924 100644 --- a/browser/help/topics/v4-browser-hts.md +++ b/browser/help/topics/v4-browser-hts.md @@ -51,10 +51,6 @@ Row fields: - `hemizygote_count`: Number of hemizygous alternate individuals in this genetic ancestry group. - `homozygote_count`: Number of homozygous alternate individuals in this genetic ancestry group. - `non_ukb`: Struct containing variant frequency information from the non-UKB subset. Includes same fields as above struct (`all`). - - `faf95`: Struct containing information about the filtered allele frequency (FAF; 95% confidence interval [CI]). - - `grpmax`: Genetic ancestry group maximum (grpmax) FAF. - - `grpmax_gen_anc`: Genetic ancestry group associated with grpmax FAF. - - `faf99`: Struct containing information about the FAF (99% CI). Contains same subfields as above (`faf95`). - `fafmax`: Struct containing information about the maximum FAF. - `gnomad`: Struct containing information about the fafmax for all of gnomad for the exome data. - `faf95_max`: Max FAF value for the (95% CI). diff --git a/data-pipeline/src/data_pipeline/datasets/gnomad_v4/gnomad_v4_variants.py b/data-pipeline/src/data_pipeline/datasets/gnomad_v4/gnomad_v4_variants.py index 0f8bd2534..c2691e80b 100644 --- a/data-pipeline/src/data_pipeline/datasets/gnomad_v4/gnomad_v4_variants.py +++ b/data-pipeline/src/data_pipeline/datasets/gnomad_v4/gnomad_v4_variants.py @@ -391,6 +391,13 @@ def freq_joint(ds, subset=None, pop=None, sex=None, raw=False): return ds +def prepare_table_for_release(variants_table_path): + ds = hl.read_table(variants_table_path) + ds = ds.annotate(exomes=ds.exomes.drop("faf95", "faf99"), genomes=ds.genomes.drop("faf95", "faf99")) + ds = ds.select_globals(mane_select_version=ds.globals.mane_transcripts_version) + return ds + + def prepare_gnomad_v4_variants(exome_variants_path: str, genome_variants_path: str, variants_joint_frequency_path: str): exome_variants = prepare_gnomad_v4_variants_helper(exome_variants_path, "exome") genome_variants = prepare_gnomad_v4_variants_helper(genome_variants_path, "genome") diff --git a/data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py b/data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py index 4b484e44d..91fb05888 100644 --- a/data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py +++ b/data-pipeline/src/data_pipeline/pipelines/gnomad_v4_variants.py @@ -8,6 +8,7 @@ from data_pipeline.datasets.gnomad_v4.gnomad_v4_variants import ( prepare_gnomad_v4_variants, + prepare_table_for_release, ) @@ -102,6 +103,15 @@ }, ) +pipeline.add_task( + name="prepare_table_for_release", + task_function=prepare_table_for_release, + output_path=f"{output_sub_dir}/gnomad_v4_variants_a4_for_release.ht", + inputs={ + "variants_table_path": pipeline.get_task("annotate_vrs_ids"), + }, +) + ############################################### # Outputs ###############################################