From 92e25b997279a6ae100ed811b8f9d22110652c41 Mon Sep 17 00:00:00 2001 From: Kalin Nonchev Date: Sat, 4 Nov 2023 10:24:23 +0100 Subject: [PATCH] clean nb --- scripts/GettingStartedwithGnomAD_DB.ipynb | 140 ++++++---------------- 1 file changed, 35 insertions(+), 105 deletions(-) diff --git a/scripts/GettingStartedwithGnomAD_DB.ipynb b/scripts/GettingStartedwithGnomAD_DB.ipynb index 66229fa..2cdc5f3 100644 --- a/scripts/GettingStartedwithGnomAD_DB.ipynb +++ b/scripts/GettingStartedwithGnomAD_DB.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "id": "ac0fca47", + "execution_count": null, + "id": "d6bb7fee", "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "acdaa43f", + "id": "29f709fc", "metadata": {}, "source": [ "# Download SQLite preprocessed files\n", @@ -26,44 +26,20 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "13b2eb85", + "execution_count": null, + "id": "a4f2b492", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting downloading...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "gnomad_db_wes_v4.0.sqlite3.gz?download=1: 7.31GB [12:40, 9.62MB/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting unzipping. This can take some time...\n", - "Database location: test_dir/gnomad_db.sqlite3\n", - "Done!\n" - ] - } - ], + "outputs": [], "source": [ "# uncomment if you actually want to download it\n", - "download_link = \"https://zenodo.org/records/10066310/files/gnomad_db_wes_v4.0.sqlite3.gz?download=1\"\n", - "output_dir = \"test_dir\" # database_location\n", - "gnomAD_DB.download_and_unzip(download_link, output_dir) " + "# download_link = \"https://zenodo.org/records/10066310/files/gnomad_db_wes_v4.0.sqlite3.gz?download=1\"\n", + "# output_dir = \"test_dir\" # database_location\n", + "# gnomAD_DB.download_and_unzip(download_link, output_dir) " ] }, { "cell_type": "markdown", - "id": "aab41c34", + "id": "fd567b25", "metadata": {}, "source": [ "# Initialize Database" @@ -71,8 +47,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "dc9b3e8c", + "execution_count": null, + "id": "d31b697b", "metadata": { "tags": [ "parameters" @@ -86,32 +62,18 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "7f430055", + "execution_count": null, + "id": "726b2d8e", "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "We don't support this version: v45. Please select one fo the following ones: ['v2', 'v3', 'v4']", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/user/31932/ipykernel_1570523/790803492.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# initialize database\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgnomAD_DB\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatabase_location\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"v45\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/code/gnomAD_DB/gnomad_db/database.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, genodb_path, gnomad_version, parallel, cpu_count)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_gnomad_version\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"base_columns\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/code/gnomAD_DB/gnomad_db/database.py\u001b[0m in \u001b[0;36m_parse_gnomad_version\u001b[0;34m(self, gnomad_version, supported_gnomad_versions)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgnomad_version\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mgnomad_version\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msupported_gnomad_versions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf\"We don't support this version: {gnomad_version}. Please select one fo the following ones: {supported_gnomad_versions}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgnomad_version\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAssertionError\u001b[0m: We don't support this version: v45. Please select one fo the following ones: ['v2', 'v3', 'v4']" - ] - } - ], + "outputs": [], "source": [ "# initialize database\n", - "db = gnomAD_DB(database_location, gnomad_version=\"v45\")" + "db = gnomAD_DB(database_location, gnomad_version=\"v4\")" ] }, { "cell_type": "markdown", - "id": "a9e3006f", + "id": "5294a85b", "metadata": {}, "source": [ "# Insert gnomAD variants into the database from single tsv file\n", @@ -121,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8def6a52", + "id": "0ab60f6c", "metadata": {}, "outputs": [], "source": [ @@ -136,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34dbb770", + "id": "28d2319d", "metadata": {}, "outputs": [], "source": [ @@ -146,7 +108,7 @@ }, { "cell_type": "markdown", - "id": "7a9243d2", + "id": "8a27dc55", "metadata": {}, "source": [ "# Query MAF" @@ -155,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "835e50b4", + "id": "17e08a76", "metadata": {}, "outputs": [], "source": [ @@ -166,7 +128,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb3a308d", + "id": "939be8c0", "metadata": {}, "outputs": [], "source": [ @@ -176,7 +138,7 @@ }, { "cell_type": "markdown", - "id": "b879dad5", + "id": "5832ab61", "metadata": {}, "source": [ "## You can pass a dataframe with variants\n", @@ -186,7 +148,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36014921", + "id": "d07eacc3", "metadata": {}, "outputs": [], "source": [ @@ -196,7 +158,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7bfea3b", + "id": "60403b2b", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +168,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0aaa8a58", + "id": "3d0cd436", "metadata": {}, "outputs": [], "source": [ @@ -216,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88c20646", + "id": "9e7b8572", "metadata": {}, "outputs": [], "source": [ @@ -231,7 +193,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ef74e6bd", + "id": "0e81620c", "metadata": {}, "outputs": [], "source": [ @@ -240,7 +202,7 @@ }, { "cell_type": "markdown", - "id": "b4261ffd", + "id": "efeec779", "metadata": {}, "source": [ "## You can pass a single string as a variant" @@ -248,42 +210,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "084c732a", + "execution_count": null, + "id": "f6162668", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "chrom 10\n", - "pos 95606780\n", - "ref A\n", - "alt C\n", - "filter PASS\n", - "AC 2.0\n", - "AN 628768.0\n", - "AF 0.000003\n", - "MQ 60.0\n", - "QD 12.1796\n", - "ReadPosRankSum 0.365\n", - "VarDP 412.0\n", - "AS_VQSLOD 5.5239\n", - "AC_grpmax 2.0\n", - "AN_grpmax 350092.0\n", - "AF_grpmax 0.000006\n", - "AF_eas 0.0\n", - "AF_nfe 0.000006\n", - "AF_fin 0.0\n", - "AF_afr 0.0\n", - "AF_asj 0.0\n", - "Name: 0, dtype: object" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "db.get_info_from_str(\"10:95606780:A>C\", \"*\")" ] @@ -291,7 +221,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e72d5071", + "id": "7d487510", "metadata": {}, "outputs": [], "source": [ @@ -300,7 +230,7 @@ }, { "cell_type": "markdown", - "id": "b2cd63fc", + "id": "d85b4921", "metadata": {}, "source": [ "## You can look for the MAF scores in an interval" @@ -309,7 +239,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e587bf1", + "id": "125170f7", "metadata": {}, "outputs": [], "source": [