diff --git a/gnomad_db/pkgdata/.ipynb_checkpoints/gnomad_columns-checkpoint.yaml b/gnomad_db/pkgdata/.ipynb_checkpoints/gnomad_columns-checkpoint.yaml deleted file mode 100644 index c1008e5..0000000 --- a/gnomad_db/pkgdata/.ipynb_checkpoints/gnomad_columns-checkpoint.yaml +++ /dev/null @@ -1,61 +0,0 @@ -base_columns: - - CHROM - - POS - - REF - - ALT - - FILTER -v2: - - AC # Alternate allele count for samples - - AN # Total number of alleles in samples - - AF # Alternate allele frequency in samples - - rf_tp_probability # Random forest prediction probability for a site being a true variant - - MQ # Root mean square of the mapping quality of reads across all samples - - QD # Variant call confidence normalized by depth of sample reads supporting a variant - - ReadPosRankSum # Z-score from Wilcoxon rank sum test of alternate vs. reference read position bias - - DP # Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered - - VQSLOD # Log-odds ratio of being a true variant versus being a false positive under the trained VQSR Gaussian mixture model - - AC_popmax # Allele count in the population with the maximum AF - - AN_popmax # Total number of alleles in the population with the maximum AF - - AF_popmax # Maximum allele frequency across populations (excluding samples of Ashkenazi - - AF_eas # Alternate allele frequency in samples of East Asian ancestry - - AF_oth # Alternate allele frequency in XY samples of Other ancestry - - AF_nfe # Alternate allele frequency in XY samples of Non-Finnish European ancestry - - AF_fin # Alternate allele frequency in XX samples of Finnish ancestry - - AF_afr # Alternate allele frequency in samples of African/African-American ancestry - - AF_asj # Alternate allele frequency in samples of Ashkenazi Jewish ancestry -v3: - - AC # Alternate allele count for samples - - AN # Total number of alleles in samples - - AF # Alternate allele frequency in samples - - InbreedingCoeff # Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation - - MQ # Root mean square of the mapping quality of reads across all samples - - QD # Variant call confidence normalized by depth of sample reads supporting a variant - - ReadPosRankSum # Z-score from Wilcoxon rank sum test of alternate vs. reference read position bias - - VarDP - - AS_VQSLOD - - AC_popmax # Allele count in the population with the maximum AF - - AN_popmax # Total number of alleles in the population with the maximum AF - - AF_popmax # Maximum allele frequency across populations (excluding samples of Ashkenazi - - AF_eas # Alternate allele frequency in samples of East Asian ancestry - - AF_nfe # Alternate allele frequency in XY samples of Non-Finnish European ancestry - - AF_fin # Alternate allele frequency in XX samples of Finnish ancestry - - AF_afr # Alternate allele frequency in samples of African/African-American ancestry - - AF_asj # Alternate allele frequency in samples of Ashkenazi Jewish ancestry - -v4: - - AC # Alternate allele count for samples - - AN # Total number of alleles in samples - - AF # Alternate allele frequency in samples - - MQ # Root mean square of the mapping quality of reads across all samples - - QD # Variant call confidence normalized by depth of sample reads supporting a variant - - ReadPosRankSum # Z-score from Wilcoxon rank sum test of alternate vs. reference read position bias - - VarDP - - AS_VQSLOD - - AC_grpmax # Allele count in the population with the maximum AF - - AN_grpmax # Total number of alleles in the population with the maximum AF - - AF_grpmax # Maximum allele frequency across populations (excluding samples of Ashkenazi - - AF_eas # Alternate allele frequency in samples of East Asian ancestry - - AF_nfe # Alternate allele frequency in XY samples of Non-Finnish European ancestry - - AF_fin # Alternate allele frequency in XX samples of Finnish ancestry - - AF_afr # Alternate allele frequency in samples of African/African-American ancestry - - AF_asj # Alternate allele frequency in samples of Ashkenazi Jewish ancestry \ No newline at end of file diff --git a/scripts/GettingStartedwithGnomAD_DB.ipynb b/scripts/GettingStartedwithGnomAD_DB.ipynb index 2b78c97..a58f8c9 100644 --- a/scripts/GettingStartedwithGnomAD_DB.ipynb +++ b/scripts/GettingStartedwithGnomAD_DB.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": null, - "id": "09f4d260", + "id": "7303ebd0", "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "3fc686b3", + "id": "8d5a63f4", "metadata": {}, "source": [ "# Download SQLite preprocessed files\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "94f31d05", + "id": "f8529267", "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "markdown", - "id": "6c140086", + "id": "6bd9a9da", "metadata": {}, "source": [ "# Initialize Database" @@ -48,7 +48,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4acdaab3", + "id": "c148a8df", "metadata": { "tags": [ "parameters" @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8c7b431e", + "id": "057a03cf", "metadata": {}, "outputs": [], "source": [ @@ -73,7 +73,7 @@ }, { "cell_type": "markdown", - "id": "34cea3dc", + "id": "6b664ad0", "metadata": {}, "source": [ "# Insert gnomAD variants into the database from single tsv file\n", @@ -83,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dc60de98", + "id": "aa628e88", "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3dcd83a", + "id": "2e0f8963", "metadata": {}, "outputs": [], "source": [ @@ -108,7 +108,7 @@ }, { "cell_type": "markdown", - "id": "a837251b", + "id": "f7ee891c", "metadata": {}, "source": [ "# Query MAF" @@ -117,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21de3d07", + "id": "b196b5e6", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +128,7 @@ { "cell_type": "code", "execution_count": null, - "id": "277d0201", + "id": "b3d2d014", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ }, { "cell_type": "markdown", - "id": "7eb48461", + "id": "76b08258", "metadata": {}, "source": [ "## You can pass a dataframe with variants\n", @@ -148,7 +148,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c62a4754", + "id": "00010e64", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +158,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c179fedc", + "id": "da779a49", "metadata": {}, "outputs": [], "source": [ @@ -168,7 +168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44969569", + "id": "ac19d6eb", "metadata": {}, "outputs": [], "source": [ @@ -178,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1a37c4e5", + "id": "bffce318", "metadata": {}, "outputs": [], "source": [ @@ -193,7 +193,7 @@ { "cell_type": "code", "execution_count": null, - "id": "537d8d7e", + "id": "a4108e52", "metadata": {}, "outputs": [], "source": [ @@ -202,7 +202,7 @@ }, { "cell_type": "markdown", - "id": "7bbe5975", + "id": "c4818aec", "metadata": {}, "source": [ "## You can pass a single string as a variant" @@ -211,7 +211,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14a15cf6", + "id": "2e788375", "metadata": {}, "outputs": [], "source": [ @@ -221,7 +221,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f667cb7d", + "id": "478d114f", "metadata": {}, "outputs": [], "source": [ @@ -230,7 +230,7 @@ }, { "cell_type": "markdown", - "id": "81251ef8", + "id": "5b2d8caf", "metadata": {}, "source": [ "## You can look for the MAF scores in an interval" @@ -239,7 +239,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dababf3a", + "id": "efa19fcc", "metadata": {}, "outputs": [], "source": [ diff --git a/scripts/README.md b/scripts/README.md index 05d6bc9..356d993 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -12,7 +12,7 @@ database_location: "test_out" # where to create the database, make sure you have gnomad_vcf_location: "data" # where are your *.vcf.bgz located tables_location: "test_out" # where to store the preprocessed intermediate files, you can leave it like this script_locations: "test_out" # where to store the scripts, where you can check the progress of your jobs, you can leave it like this -genome: "Grch37" # genome version of the gnomAD vcf file (2.1.1 = Grch37, 3.1.1 = Grch38) +gnomad_version: "v4" # genome version of the gnomAD vcf file (e.g., v2, v3, v4) ``` Once this is done, run diff --git a/scripts/createTSVtables.ipynb b/scripts/createTSVtables.ipynb index a30121e..0cb830b 100644 --- a/scripts/createTSVtables.ipynb +++ b/scripts/createTSVtables.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "id": "24fdca4a", + "execution_count": null, + "id": "04c119c1", "metadata": { "papermill": { "duration": 0.336842, @@ -28,8 +28,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "bf59473a", + "execution_count": null, + "id": "713cfb12", "metadata": { "papermill": { "duration": 0.336842, @@ -47,8 +47,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "5a4ad452", + "execution_count": null, + "id": "a88b05a7", "metadata": { "papermill": { "duration": 0.014665, @@ -70,8 +70,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "dbfa6122", + "execution_count": null, + "id": "fd814421", "metadata": { "papermill": { "duration": 0.014665, @@ -82,34 +82,7 @@ }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19\n" - ] - }, - { - "data": { - "text/plain": [ - "['CHROM',\n", - " 'POS',\n", - " 'REF',\n", - " 'ALT',\n", - " 'FILTER',\n", - " 'AC',\n", - " 'AN',\n", - " 'AF',\n", - " 'InbreedingCoeff',\n", - " 'MQ']" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "with open(\"gnomad_db/pkgdata/gnomad_columns.yaml\") as f:\n", " columns = yaml.load(f, Loader=yaml.FullLoader)\n", @@ -120,8 +93,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "3a9be105", + "execution_count": null, + "id": "b8884615", "metadata": { "papermill": { "duration": 0.014665, @@ -132,48 +105,7 @@ }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "24\n" - ] - }, - { - "data": { - "text/plain": [ - "['/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr19.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr1.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chrY.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr8.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr21.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr3.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr14.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr9.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr16.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr11.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr20.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr5.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr2.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr10.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr4.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr12.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chrX.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr17.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr13.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr6.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr15.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr18.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr22.vcf.bgz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr7.vcf.bgz']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# get gnomAD files\n", "files = glob.glob(f\"{gnomad_vcf_location}/*.bgz\")\n", @@ -183,8 +115,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "9c94c5b3", + "execution_count": null, + "id": "62457d3f", "metadata": { "papermill": { "duration": 0.008922, @@ -195,41 +127,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "['/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr19.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr1.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chrY.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr8.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr21.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr3.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr14.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr9.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr16.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr11.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr20.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr5.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr2.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr10.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr4.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr12.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chrX.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr17.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr13.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr6.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr15.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr18.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr22.tsv.gz',\n", - " '/mnt/biocluster/praktikum/tutorium_19/gnomad_db/data/exomes/gnomad.exomes.v4.0.sites.chr7.tsv.gz']" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# write gnomAD files to these tables:\n", "tables_location = [f'{tables_location}/{file.split(\"/\")[-1].replace(\".vcf.bgz\", \"\")}.tsv.gz' for file in files]\n", @@ -238,8 +136,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "c6372e88", + "execution_count": null, + "id": "dd3dca18", "metadata": { "papermill": { "duration": 0.008863, @@ -250,18 +148,7 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "12" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cpu_count = int(multiprocessing.cpu_count())\n", "cpu_count" @@ -269,8 +156,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "742ef702", + "execution_count": null, + "id": "488eb2ba", "metadata": { "papermill": { "duration": 0.008863, @@ -296,8 +183,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "589e7a3a", + "execution_count": null, + "id": "9f099dcf", "metadata": { "papermill": { "duration": 0.329741, @@ -308,78 +195,7 @@ }, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "24it [00:00, 61.20it/s] \n" - ] - }, - { - "data": { - "text/plain": [ - "[None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n", - "(b'', b'/bin/sh: 1: bcftools: not found\\n')\n" - ] - } - ], + "outputs": [], "source": [ "# run bcftools in parallel\n", "Parallel(cpu_count)(delayed(create_table)(file, table_location) for file, table_location in tqdm(zip(files, tables_location)))" @@ -388,7 +204,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46394a5c", + "id": "f8103d87", "metadata": {}, "outputs": [], "source": [] diff --git a/test_dir/test_gnomad_db.py b/test_dir/test_gnomad_db.py index 3970b33..2d257ae 100644 --- a/test_dir/test_gnomad_db.py +++ b/test_dir/test_gnomad_db.py @@ -9,10 +9,10 @@ def database(): with open("script_config.yaml", 'r') as stream: config = yaml.safe_load(stream) - genome = config["genome"] + gnomad_version = config["gnomad_version"] database_location = config['database_location'] - database = gnomAD_DB(database_location, genome=genome) + database = gnomAD_DB(database_location, gnomad_version=gnomad_version) var_df = pd.read_csv("data/test_vcf_gnomad_chr21_10000.tsv.gz", sep="\t", names=database.columns, index_col=False)