Skip to content

Commit

Permalink
clean nb
Browse files Browse the repository at this point in the history
  • Loading branch information
Kalin Nonchev committed Nov 4, 2023
1 parent 6984c34 commit 92e25b9
Showing 1 changed file with 35 additions and 105 deletions.
140 changes: 35 additions & 105 deletions scripts/GettingStartedwithGnomAD_DB.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "ac0fca47",
"execution_count": null,
"id": "d6bb7fee",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -14,7 +14,7 @@
},
{
"cell_type": "markdown",
"id": "acdaa43f",
"id": "29f709fc",
"metadata": {},
"source": [
"# Download SQLite preprocessed files\n",
Expand All @@ -26,53 +26,29 @@
},
{
"cell_type": "code",
"execution_count": 8,
"id": "13b2eb85",
"execution_count": null,
"id": "a4f2b492",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting downloading...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"gnomad_db_wes_v4.0.sqlite3.gz?download=1: 7.31GB [12:40, 9.62MB/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting unzipping. This can take some time...\n",
"Database location: test_dir/gnomad_db.sqlite3\n",
"Done!\n"
]
}
],
"outputs": [],
"source": [
"# uncomment if you actually want to download it\n",
"download_link = \"https://zenodo.org/records/10066310/files/gnomad_db_wes_v4.0.sqlite3.gz?download=1\"\n",
"output_dir = \"test_dir\" # database_location\n",
"gnomAD_DB.download_and_unzip(download_link, output_dir) "
"# download_link = \"https://zenodo.org/records/10066310/files/gnomad_db_wes_v4.0.sqlite3.gz?download=1\"\n",
"# output_dir = \"test_dir\" # database_location\n",
"# gnomAD_DB.download_and_unzip(download_link, output_dir) "
]
},
{
"cell_type": "markdown",
"id": "aab41c34",
"id": "fd567b25",
"metadata": {},
"source": [
"# Initialize Database"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "dc9b3e8c",
"execution_count": null,
"id": "d31b697b",
"metadata": {
"tags": [
"parameters"
Expand All @@ -86,32 +62,18 @@
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7f430055",
"execution_count": null,
"id": "726b2d8e",
"metadata": {},
"outputs": [
{
"ename": "AssertionError",
"evalue": "We don't support this version: v45. Please select one fo the following ones: ['v2', 'v3', 'v4']",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/31932/ipykernel_1570523/790803492.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# initialize database\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgnomAD_DB\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatabase_location\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"v45\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/code/gnomAD_DB/gnomad_db/database.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, genodb_path, gnomad_version, parallel, cpu_count)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_gnomad_version\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"base_columns\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/code/gnomAD_DB/gnomad_db/database.py\u001b[0m in \u001b[0;36m_parse_gnomad_version\u001b[0;34m(self, gnomad_version, supported_gnomad_versions)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msupported_gnomad_versions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf\"We don't support this version: {gnomad_version}. Please select one fo the following ones: {supported_gnomad_versions}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAssertionError\u001b[0m: We don't support this version: v45. Please select one fo the following ones: ['v2', 'v3', 'v4']"
]
}
],
"outputs": [],
"source": [
"# initialize database\n",
"db = gnomAD_DB(database_location, gnomad_version=\"v45\")"
"db = gnomAD_DB(database_location, gnomad_version=\"v4\")"
]
},
{
"cell_type": "markdown",
"id": "a9e3006f",
"id": "5294a85b",
"metadata": {},
"source": [
"# Insert gnomAD variants into the database from single tsv file\n",
Expand All @@ -121,7 +83,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "8def6a52",
"id": "0ab60f6c",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -136,7 +98,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "34dbb770",
"id": "28d2319d",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -146,7 +108,7 @@
},
{
"cell_type": "markdown",
"id": "7a9243d2",
"id": "8a27dc55",
"metadata": {},
"source": [
"# Query MAF"
Expand All @@ -155,7 +117,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "835e50b4",
"id": "17e08a76",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -166,7 +128,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "eb3a308d",
"id": "939be8c0",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -176,7 +138,7 @@
},
{
"cell_type": "markdown",
"id": "b879dad5",
"id": "5832ab61",
"metadata": {},
"source": [
"## You can pass a dataframe with variants\n",
Expand All @@ -186,7 +148,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "36014921",
"id": "d07eacc3",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -196,7 +158,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a7bfea3b",
"id": "60403b2b",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -206,7 +168,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0aaa8a58",
"id": "3d0cd436",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -216,7 +178,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "88c20646",
"id": "9e7b8572",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -231,7 +193,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ef74e6bd",
"id": "0e81620c",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -240,58 +202,26 @@
},
{
"cell_type": "markdown",
"id": "b4261ffd",
"id": "efeec779",
"metadata": {},
"source": [
"## You can pass a single string as a variant"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "084c732a",
"execution_count": null,
"id": "f6162668",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"chrom 10\n",
"pos 95606780\n",
"ref A\n",
"alt C\n",
"filter PASS\n",
"AC 2.0\n",
"AN 628768.0\n",
"AF 0.000003\n",
"MQ 60.0\n",
"QD 12.1796\n",
"ReadPosRankSum 0.365\n",
"VarDP 412.0\n",
"AS_VQSLOD 5.5239\n",
"AC_grpmax 2.0\n",
"AN_grpmax 350092.0\n",
"AF_grpmax 0.000006\n",
"AF_eas 0.0\n",
"AF_nfe 0.000006\n",
"AF_fin 0.0\n",
"AF_afr 0.0\n",
"AF_asj 0.0\n",
"Name: 0, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"db.get_info_from_str(\"10:95606780:A>C\", \"*\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e72d5071",
"id": "7d487510",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -300,7 +230,7 @@
},
{
"cell_type": "markdown",
"id": "b2cd63fc",
"id": "d85b4921",
"metadata": {},
"source": [
"## You can look for the MAF scores in an interval"
Expand All @@ -309,7 +239,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0e587bf1",
"id": "125170f7",
"metadata": {},
"outputs": [],
"source": [
Expand Down

0 comments on commit 92e25b9

Please sign in to comment.